Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fast rcnn #5

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
456cf08
Initial version of Fast-RCNN. Work in progress
fmassa Jul 9, 2015
d6a5caa
Add ROIDataLayer, still incomplete
fmassa Aug 7, 2015
23198fd
Merge branch 'master' of https://github.com/fmassa/object-detection.t…
fmassa Aug 8, 2015
722c433
Commiting what we have for the moment
fmassa Aug 13, 2015
58074c5
Updating BatchProviderROI
fmassa Aug 13, 2015
f1c256f
Basics of Fast-RCNN seems to be working
fmassa Aug 15, 2015
3bf8968
Almost working
fmassa Aug 15, 2015
7c9ad2e
Basic test for FRCNN
fmassa Aug 16, 2015
5819a83
Fix bug in ROIPooling
fmassa Aug 17, 2015
f245f29
Merge branch 'master' of https://github.com/fmassa/object-detection.t…
fmassa Aug 17, 2015
cf614a2
Merge branch 'fast-rcnn' of https://github.com/fmassa/object-detectio…
fmassa Aug 17, 2015
bbe3dd2
Improve training script for frcnn
fmassa Aug 18, 2015
05d8ad6
Cleaning up and fix in test
fmassa Aug 19, 2015
3e10973
Add gitignore
fmassa Aug 20, 2015
ffaf0f2
Fix unitialized memory in BatchProviderROI
fmassa Aug 22, 2015
6836ca2
Fix bug in BatchProviderROI
fmassa Aug 22, 2015
4ce9107
Add flip in training and test code
fmassa Aug 23, 2015
aac63f6
Force rois to be in cpu
fmassa Aug 23, 2015
7b8b8e7
Fix wrong batch-size
fmassa Aug 30, 2015
4a46bd0
Modifications in the training procedure
fmassa Aug 31, 2015
4d0d46a
fix bug in nms
szagoruyko Sep 22, 2015
2f3c563
Merge pull request #8 from szagoruyko/fast-rcnn
fmassa Sep 22, 2015
154313c
Fix BatchProviderROI incorrect super constructor invokation
Sep 23, 2015
9c3e73f
Merge pull request #9 from 0wu/fast-rcnn
fmassa Sep 23, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*~
*.swp
cachedir/*
158 changes: 158 additions & 0 deletions BatchProviderROI.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
local BatchProviderROI, parent = torch.class('nnf.BatchProviderROI','nnf.BatchProvider')

function BatchProviderROI:__init(dataset)
local fp = {dataset=dataset}
parent.__init(self, fp)
self.imgs_per_batch = 2
self.scale = 600
self.max_size = 1000
self.image_transformer = nnf.ImageTransformer{}
end

-- setup is the same

function BatchProviderROI:permuteIdx()
--local fg_num_total = self.fg_num_total
--local bg_num_total = self.bg_num_total
local total_img = self.dataset:size()
local imgs_per_batch = self.imgs_per_batch

self._cur = self._cur or math.huge

if self._cur + imgs_per_batch > total_img then
self._perm = torch.randperm(total_img)
self._cur = 1
end

local img_idx = self._perm[{{self._cur,self._cur + self.imgs_per_batch - 1}}]
self._cur = self._cur + self.imgs_per_batch

local img_idx_end = imgs_per_batch

local fg_windows = {}
local bg_windows = {}
for i=1,img_idx_end do
local curr_idx = img_idx[i]
bg_windows[i] = {}
if self.bboxes[curr_idx][0] then
for j=1,self.bboxes[curr_idx][0]:size(1) do
table.insert(bg_windows[i],{curr_idx,j})
end
end
fg_windows[i] = {}
if self.bboxes[curr_idx][1] then
for j=1,self.bboxes[curr_idx][1]:size(1) do
table.insert(fg_windows[i],{curr_idx,j})
end
end
end
local do_flip = torch.FloatTensor(imgs_per_batch):random(0,1)
local opts = {img_idx=img_idx,img_idx_end=img_idx_end,do_flip=do_flip}
return fg_windows,bg_windows,opts

end

function BatchProviderROI:selectBBoxes(fg_windows,bg_windows,im_scales,do_flip,im_sizes)
local fg_num_each = torch.round(self.fg_num_each/self.imgs_per_batch)
local bg_num_each = torch.round(self.bg_num_each/self.imgs_per_batch)

local rois = {}
local labels = {}
for im=1,self.imgs_per_batch do
local im_scale = im_scales[im]
local window_idx = torch.randperm(#bg_windows[im])
local end_idx = math.min(bg_num_each,#bg_windows[im])
local flip = do_flip[im] == 1
local im_size = im_sizes[im]
for i=1,end_idx do
local curr_idx = bg_windows[im][window_idx[i] ][1]
local position = bg_windows[im][window_idx[i] ][2]
local dd = self.bboxes[curr_idx][0][position][{{2,5}}]:clone()
dd:add(-1):mul(im_scale):add(1)
if flip then
local tt = dd[1]
dd[1] = im_size[2]-dd[3] +1
dd[3] = im_size[2]-tt +1
end
table.insert(rois,{im,dd[1],dd[2],dd[3],dd[4]})
table.insert(labels,self.bboxes[curr_idx][0][position][6])
end

window_idx = torch.randperm(#fg_windows[im])
local end_idx = math.min(fg_num_each,#fg_windows[im])
for i=1,end_idx do
local curr_idx = fg_windows[im][window_idx[i] ][1]
local position = fg_windows[im][window_idx[i] ][2]
local dd = self.bboxes[curr_idx][1][position][{{2,5}}]:clone()
dd:add(-1):mul(im_scale):add(1)
if flip then
local tt = dd[1]
dd[1] = im_size[2]-dd[3] +1
dd[3] = im_size[2]-tt +1
end
table.insert(rois,{im,dd[1],dd[2],dd[3],dd[4]})
table.insert(labels,self.bboxes[curr_idx][1][position][6])
end
end
rois = torch.FloatTensor(rois)
labels = torch.IntTensor(labels)
return rois, labels
end

local function getImages(self,img_ids,images,do_flip)
local dataset = self.dataset
local num_images = img_ids:size(1)

local imgs = {}
local im_sizes = {}
local im_scales = {}

for i=1,num_images do
local im = dataset:getImage(img_ids[i])
im = self.image_transformer:preprocess(im)
local flip = do_flip[i] == 1
if flip then
im = image.hflip(im)
end
local im_size = im[1]:size()
local im_size_min = math.min(im_size[1],im_size[2])
local im_size_max = math.max(im_size[1],im_size[2])
local im_scale = self.scale/im_size_min
if torch.round(im_scale*im_size_max) > self.max_size then
im_scale = self.max_size/im_size_max
end
local im_s = {torch.round(im_size[1]*im_scale),torch.round(im_size[2]*im_scale)}
table.insert(imgs,image.scale(im,im_s[2],im_s[1]))
table.insert(im_sizes,im_s)
table.insert(im_scales,im_scale)
end
-- create single tensor with all images, padding with zero for different sizes
im_sizes = torch.IntTensor(im_sizes)
local max_shape = im_sizes:max(1)[1]
images:resize(num_images,3,max_shape[1],max_shape[2]):zero()
for i=1,num_images do
images[i][{{},{1,imgs[i]:size(2)},{1,imgs[i]:size(3)}}]:copy(imgs[i])
end
return im_scales,im_sizes
end


function BatchProviderROI:getBatch(batches,targets)
local dataset = self.dataset

self.fg_num_each = self.fg_fraction * self.batch_size
self.bg_num_each = self.batch_size - self.fg_num_each

local fg_windows,bg_windows,opts = self:permuteIdx()
--local fg_w,bg_w = self:selectBBoxes(fg_windows,bg_windows)

local batches = batches or {torch.FloatTensor(),torch.FloatTensor()}
local targets = targets or torch.FloatTensor()

local im_scales, im_sizes = getImages(self,opts.img_idx,batches[1],opts.do_flip)
local rois,labels = self:selectBBoxes(fg_windows,bg_windows,im_scales,opts.do_flip, im_sizes)
batches[2]:resizeAs(rois):copy(rois)
targets:resize(labels:size()):copy(labels)

return batches, targets
end
42 changes: 42 additions & 0 deletions FRCNN.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
local FRCNN = torch.class('nnf.FRCNN')

function FRCNN:__init(dataset)
self.dataset = dataset

self.scale = {600}
self.max_dim = 1000
self.randomscale = true

--self.sz_conv_standard = 13
self.step_standard = 16
--self.offset0 = 21
--self.offset = 6.5

--self.inputArea = 224^2

end

function FRCNN:getScale(I)
local min_size = math.min(I[2],I[3])
local max_size = math.max(I[2],I[3])
local scale
if max_size <= self.max_dim then
scale = self.scale[1]/min_size
else
scale = self.max_dim/max_size
end
return scale
end

function FRCNN:projectBBoxes(bboxes,scale)
return (bboxes-1)*scale+1
end

function FRCNN:getFeatures(i,flip)
local I = self.dataset:getImage(i)
local bboxes = self.dataset:attachProposals(i)
I = prepareImage(I)
if flip then

end
end
88 changes: 88 additions & 0 deletions ImageDetect.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
local ImageDetect = torch.class('nnf.ImageDetect')

function ImageDetect:__init(model)
self.model = model
self.image_transformer = nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
raw_scale = 255,
swap = {3,2,1}}
self.scale = {600}
self.max_size = 1000
self.sm = nn.SoftMax():cuda()
end


local function getImages(self,images,im)
local num_scales = #self.scale

local imgs = {}
local im_sizes = {}
local im_scales = {}

im = self.image_transformer:preprocess(im)

local im_size = im[1]:size()
local im_size_min = math.min(im_size[1],im_size[2])
local im_size_max = math.max(im_size[1],im_size[2])
for i=1,num_scales do
local im_scale = self.scale[i]/im_size_min
if torch.round(im_scale*im_size_max) > self.max_size then
im_scale = self.max_size/im_size_max
end
local im_s = {im_size[1]*im_scale,im_size[2]*im_scale}
table.insert(imgs,image.scale(im,im_s[2],im_s[1]))
table.insert(im_sizes,im_s)
table.insert(im_scales,im_scale)
end
-- create single tensor with all images, padding with zero for different sizes
im_sizes = torch.IntTensor(im_sizes)
local max_shape = im_sizes:max(1)[1]
images:resize(num_scales,3,max_shape[1],max_shape[2]):zero()
for i=1,num_scales do
images[i][{{},{1,imgs[i]:size(2)},{1,imgs[i]:size(3)}}]:copy(imgs[i])
end
return im_scales
end

local function project_im_rois(im_rois,scales)
local levels
local rois = torch.FloatTensor()
if #scales > 1 then
local scales = torch.FloatTensor(scales)
local widths = im_rois[{{},3}] - im_rois[{{},1}] + 1
local heights = im_rois[{{},4}] - im_rois[{{}, 2}] + 1

local areas = widths * heights
local scaled_areas = areas:view(-1,1) * torch.pow(scales:view(1,-1),2)
local diff_areas = torch.abs(scaled_areas - 224 * 224)
levels = select(2, diff_areas:min(2))
else
levels = torch.FloatTensor()
rois:resize(im_rois:size(1),5)
rois[{{},1}]:fill(1)
rois[{{},{2,5}}]:copy(im_rois):add(-1):mul(scales[1]):add(1)
end

return rois

end

-- supposes boxes is in [x1,y1,x2,y2] format
function ImageDetect:detect(im,boxes)
local inputs = {torch.FloatTensor(),torch.FloatTensor()}
local im_scales = getImages(self,inputs[1],im)
inputs[2] = project_im_rois(boxes,im_scales)

local inputs_cuda = {torch.CudaTensor(),torch.CudaTensor()}
inputs_cuda[1]:resize(inputs[1]:size()):copy(inputs[1])
inputs_cuda[2]:resize(inputs[2]:size()):copy(inputs[2])
local output0 = self.model:forward(inputs_cuda)
local output = self.sm:forward(output0):float()
--[[
for i=1,#im_scales do
local dd = boxes:clone()
dd:add(-1):mul(im_scale[i]):add(1)

end
--]]
return output
end
86 changes: 86 additions & 0 deletions ROIPooling.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
local ROIPooling,parent = torch.class('nnf.ROIPooling','nn.Module')

function ROIPooling:__init(W,H)
parent.__init(self)
self.W = W
self.H = H
self.pooler = {}--nn.SpatialAdaptiveMaxPooling(W,H)
self.spatial_scale = 1
self.gradInput = {torch.Tensor()}
end

function ROIPooling:setSpatialScale(scale)
self.spatial_scale = scale
return self
end

function ROIPooling:updateOutput(input)
local data = input[1]
local rois = input[2]

local num_rois = rois:size(1)
local s = data:size()
local ss = s:size(1)
self.output:resize(num_rois,s[ss-2],self.H,self.W)

rois[{{},{2,5}}]:add(-1):mul(self.spatial_scale):add(1):round()
rois[{{},2}]:cmin(s[ss])
rois[{{},3}]:cmin(s[ss-1])
rois[{{},4}]:cmin(s[ss])
rois[{{},5}]:cmin(s[ss-1])

-- element access is faster if not a cuda tensor
if rois:type() == 'torch.CudaTensor' then
self._rois = self._rois or torch.FloatTensor()
self._rois:resize(rois:size()):copy(rois)
rois = self._rois
end

if not self._type then self._type = output:type() end

if #self.pooler < num_rois then
local diff = num_rois - #self.pooler
for i=1,diff do
table.insert(self.pooler,nn.SpatialAdaptiveMaxPooling(self.W,self.H):type(self._type))
end
end

for i=1,num_rois do
local roi = rois[i]
local im_idx = roi[1]
local im = data[{im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}]
self.output[i] = self.pooler[i]:updateOutput(im)
end
return self.output
end

function ROIPooling:updateGradInput(input,gradOutput)
local data = input[1]
local rois = input[2]
if rois:type() == 'torch.CudaTensor' then
rois = self._rois
end
local num_rois = rois:size(1)
local s = data:size()
local ss = s:size(1)
self.gradInput[1]:resizeAs(data):zero()

for i=1,num_rois do
local roi = rois[i]
local im_idx = roi[1]
local r = {im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}
local im = data[r]
local g = self.pooler[i]:updateGradInput(im,gradOutput[i])
self.gradInput[1][r]:add(g)
end
return self.gradInput
end

function ROIPooling:type(type)
parent.type(self,type)
for i=1,#self.pooler do
self.pooler[i]:type(type)
end
self._type = type
return self
end
Loading