fmassa
diff --git a/‎README.md
Lines changed: 23 additions & 9 deletions b/‎README.md
Lines changed: 23 additions & 9 deletions
diff --git a/‎countUsedMemory.lua
Lines changed: 41 additions & 0 deletions b/‎countUsedMemory.lua
Lines changed: 41 additions & 0 deletions
diff --git a/‎env.lua
Lines changed: 2 additions & 0 deletions b/‎env.lua
Lines changed: 2 additions & 0 deletions
diff --git a/‎init.lua
Lines changed: 96 additions & 13 deletions b/‎init.lua
Lines changed: 96 additions & 13 deletions
@@ -6,8 +6,19 @@ Heavily inspired from the `Optimizer` from https://github.com/facebook/fb-caffe-
 
 ## How does it work ?
 
-It goes over the network and verify which buffers can be reused. Currently only
-the `output` of each module are reused.
+It goes over the network and verify which buffers can be reused.
+Currently, it only supports evaluation mode, but training mode will soon be included.
+
+Here is a list of currently tested modules (numbers are for CPU version, with batch size of 1, in the format (total memory used, memory used for the outputs)):
+
+| Network | before optimization | after optimization | Relative save |
+| ------- | :--------: | :-------: | :------: |
+|alexnet | (972MB, 6MB) | (933MB, 1.5MB) | (4%, 75%) |
+|vgg16 | (2311MB, 69MB) | (2119MB, 30MB) | (8%, 55%) |
+|googlenet | (505MB, 69MB) | (337MB, 30MB) | (33%, 57%) |
+|resnet 110 (cifar)| (113MB, 16MB) | (32MB, 4MB) | (72%, 73%) |
+
+Note that most of the used memory goes to the convolution buffers from `nn`.
 
 ## Visualizing the memory reuse
 
@@ -25,6 +36,8 @@ having to use `nngraph`.
 Let's have a look:
 
 ```lua
+-- some handy models are defined in optnet.models
+-- line alexnet, googlenet and resnet
 models = require 'optnet.models'
 modelname = 'googlenet'
 net, input = models[modelname]()
@@ -34,7 +47,6 @@ generateGraph = require 'optnet.graphgen'
 g = generateGraph(net, input)
 
 graph.dot(g,modelname,modelname)
-
 ```
 
 This generates the following graph:
@@ -49,11 +61,13 @@ models = require 'optnet.models'
 modelname = 'googlenet'
 net, input = models[modelname]()
 
+opts = {inplace=true, reuseBuffers=true}
+
 generateGraph = require 'optnet.graphgen'
 
 optnet = require 'optnet'
 
-optnet.optimizeMemory(net, input)
+optnet.optimizeMemory(net, input, opts)
 
 g = generateGraph(net, input)
 
@@ -71,22 +85,22 @@ Here is an example
 
 ```lua
 optnet = require 'optnet'
-utils = require 'optnet.utils'
-usedMemory = utils.usedMemory
 
 models = require 'optnet.models'
 modelname = 'googlenet'
 net, input = models[modelname]()
 
-mem1 = usedMemory(net, input)
+opts = {countBuffers=true}
+
+mem1 = optnet.countUsedMemory(net, input, opts)
 
 optnet.optimizeMemory(net, input)
 
-mem2 = usedMemory(net, input)
+mem2 = optnet.countUsedMemory(net, input, opts)
 
 optnet.removeOptimization(net)
 
-mem3 = usedMemory(net, input)
+mem3 = optnet.countUsedMemory(net, input, opts)
 
 print('Before optimization        : '.. mem1/1024/1024 .. ' MBytes')
 print('After optimization         : '.. mem2/1024/1024 .. ' MBytes')
 
@@ -0,0 +1,41 @@
+local optnet = require 'optnet.env'
+local utils = require 'optnet.utils'
+local keepTrack = utils.keepTrack
+
+function optnet.countUsedMemory(net, input, opts)
+  opts = opts or {}
+  local countBuffers = opts.countBuffers or false
+  local func = opts.func or 'updateOutput'
+  net[func](net, input)
+  local tensors = {}
+  local function entry_fun(t)
+    return t
+  end
+  local function new_func(m)
+    local basefunc = m[func]
+    m[func] = function(self, input)
+      --keepTrack(input, tensors, entry_fun)
+      keepTrack(self.output, tensors, entry_fun)
+      if countBuffers then
+        for k, v in pairs(self) do
+          if torch.isTensor(v) then
+            keepTrack(v, tensors, entry_fun)
+          end
+        end
+      end
+      return basefunc(self, input)
+    end
+  end
+  net:apply(new_func)
+  net[func](net, input)
+  -- clean up the modified function
+  net:apply(function(x)
+    x[func] = nil
+  end)
+  local total_size = 0
+  for k,v in pairs(tensors) do
+    local size = v:storage():size()*v:elementSize()
+    total_size = total_size + size
+  end
+  return total_size--/(1024*1024) -- MB
+end
@@ -0,0 +1,2 @@
+local optnet = {}
+return optnet
@@ -1,8 +1,9 @@
 require 'nn'
 
-local utils = require 'optnet.utils'
+local optnet = require 'optnet.env'
+require 'optnet.countUsedMemory'
 
-local optnet = {}
+local utils = require 'optnet.utils'
 
 local kNotUsed = 10000---1
 local kNotDefined = 0
@@ -11,13 +12,6 @@ local kAlwaysLive = 10000
 
 local function analyse(net, input, func)
   local func = func or 'updateOutput'
-  local grad
-  if func == 'backward' then
-    -- need to run forward before backward
-    grad = net['forward'](net, input)
-  end
-  -- do a pass over the network to initialize its fields
-  net[func](net, input, grad)
 
   local track = {}
   local analysis = {}
@@ -81,6 +75,17 @@ local function analyse(net, input, func)
   net:apply(function(x)
     x[func] = nil
   end)
+
+  -- disable backward pass if in evaluation mode
+  if func == 'updateOutput' then
+    net:apply(function(m)
+      m.updateGradInput = function(self, input, gradInput)
+        error([[Backward pass disabled!
+          You are using inference optimization.
+          Call optnet.removeOptimization(net) to enable backward again]])
+      end
+    end)
+  end
   return analysis
 end
 
@@ -131,13 +136,79 @@ local function applyAssignments(net, assignments)
   end
 end
 
+local function defaultValue(var, val)
+  if var == nil then
+    var = val
+  end
+  return var
+end
+
+-- set to inplace modules that allows it
+local function setInplace(net, opts)
+  local inplace = defaultValue(opts.inplace, true)
+ 
+  if inplace then
+    net:apply(function(m)
+      if m.inplace ~= nil then
+        -- inplace is not always supported for threshold,
+        -- depending on the values. Disabling it for the moment
+        if torch.typename(m) ~= 'nn.Threshold' then
+          m.inplace = true
+        end
+      end
+    end)
+  end
+end
+
+local reusableBuffers = {
+['nn.SpatialConvolution'] = {{'finput','fgradInput'},{}},
+['nn.SpatialConvolutionMM'] = {{'finput','fgradInput'},{}},
+['nn,Normalize'] = {{'norm','buffer','normp','_indices'},{}},
+['nn.SpatialCrossMapLRN'] = {{'scale'},{}},
+['nn.SpatialMaxPooling'] = {{'indices'},{}},
+}
+-- basic reusing scheme: keeps a list of all possible buffers
+-- that can be reused in evaluation mode and also in training
+-- mode.
+local function reuseStateBuffers(net, opts)
+  local reuseBuffers = defaultValue(opts.reuseBuffers, true)
+  if reuseBuffers then
+    local reusedBuffers = {}
+    net:apply(function(m)
+      local name = torch.typename(m)
+      if reusableBuffers[name] then
+        local rb = reusableBuffers[name][1]
+        for k, v in ipairs(rb) do
+          if m[v] then
+            reusedBuffers[name..','..v] = reusedBuffers[name..','..v] or m[v]:storage()
+            if reusedBuffers[name..','..v] then
+              m[v]:set(reusedBuffers[name..','..v])
+            end
+          end
+        end
+      end
+    end)
+  end
+end
+
 function optnet.optimizeMemory(net, input, opts)
+  opts = opts or {}
+  local func = defaultValue(opts.func,'forward')
+
+  local grad
+  if func == 'backward' then
+    -- need to run forward before backward
+    grad = net['forward'](net, input)
+  end
+  -- do a pass over the network to initialize its fields
+  net[func](net, input, grad)
+
+  setInplace(net, opts)
+  reuseStateBuffers(net, opts)
+
+  -- share outputs
   local analysis = analyse(net, input)
---  print('Analysis')
---  print(analysis)
   local assignments = assign(net,analysis)
---  print('Assignments')
---  print(assignments)
   applyAssignments(net, assignments)
 end
 
@@ -156,6 +227,18 @@ function optnet.removeOptimization(net)
   net:apply(function(m)
     rem(m.output)
     rem(m.gradInput)
+    local name = torch.typename(m)
+    if reusableBuffers[name] then
+      local rb = reusableBuffers[name][1]
+      for k, v in ipairs(rb) do
+        if m[v] then
+          m[v]:set()
+        end
+      end
+    end
+
+    -- remove backward blocking
+    m.updateGradInput = nil
   end)
 end