add checkpoint function, fixes HIPS#182 (thanks @j-towns)

mattjj · mattjj · commit 000f667fbafc · 2017-03-14T08:03:18.000-07:00
diff --git a/autograd/__init__.py b/autograd/__init__.py
@@ -3,4 +3,5 @@
 from . import container_types
 from .convenience_wrappers import (grad, multigrad, multigrad_dict, elementwise_grad,
                                    value_and_grad, grad_and_aux, hessian_vector_product,
-                                   hessian, jacobian, vector_jacobian_product, grad_named)
+                                   hessian, jacobian, vector_jacobian_product, grad_named,
+                                   checkpoint)
diff --git a/autograd/convenience_wrappers.py b/autograd/convenience_wrappers.py
@@ -2,7 +2,7 @@
 from __future__ import absolute_import
 from functools import partial
 import autograd.numpy as np
-from autograd.core import make_vjp, getval, isnode, vspace
+from autograd.core import make_vjp, getval, isnode, vspace, primitive
 from .errors import add_error_hints
 from collections import OrderedDict
 from inspect import getargspec
@@ -180,6 +180,18 @@ def gradfun(*args, **kwargs):
 
     return gradfun
 
+def checkpoint(fun):
+    """Returns a checkpointed version of `fun`, where intermediate values
+    computed during the forward pass of `fun` are discarded and then recomputed
+    for the backward pass. Useful to save memory, effectively trading off time
+    and memory. See e.g. arxiv.org/abs/1604.06174.
+    """
+    def wrapped_grad(argnum, g, ans, vs, gvs, args, kwargs):
+        return make_vjp(fun, argnum)(*args, **kwargs)[0](g)
+    wrapped = primitive(fun)
+    wrapped.vjp = wrapped_grad
+    return wrapped
+
 def attach_name_and_doc(fun, argnum, opname):
     namestr = "{op}_{fun}_wrt_argnum_{argnum}".format(
         op=opname.lower(), fun=getattr(fun, '__name__', '[unknown name]'), argnum=argnum)
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
@@ -6,7 +6,7 @@
 from autograd.util import *
 from autograd import (grad, elementwise_grad, jacobian, value_and_grad,
                       grad_and_aux, hessian_vector_product, hessian, multigrad,
-                      jacobian, vector_jacobian_product, primitive)
+                      jacobian, vector_jacobian_product, primitive, checkpoint)
 from builtins import range
 
 npr.seed(1)
@@ -161,3 +161,44 @@ def f(x):
 
     y = np.random.randn(10, 10).astype(np.float16)
     assert grad(f)(y).dtype.type is np.float16
+
+def test_checkpoint_correctness():
+    bar = lambda x, y: 2*x + y + 5
+    checkpointed_bar = checkpoint(bar)
+    foo = lambda x: bar(x, x/3.) + bar(x, x**2)
+    foo2 = lambda x: checkpointed_bar(x, x/3.) + checkpointed_bar(x, x**2)
+    assert np.allclose(foo(3.), foo2(3.))
+    assert np.allclose(grad(foo)(3.), grad(foo2)(3.))
+
+    baz = lambda *args: sum(args)
+    checkpointed_baz = checkpoint(baz)
+    foobaz = lambda x: baz(x, x/3.)
+    foobaz2 = lambda x: checkpointed_baz(x, x/3.)
+    assert np.allclose(foobaz(3.), foobaz2(3.))
+    assert np.allclose(grad(foobaz)(3.), grad(foobaz2)(3.))
+
+def checkpoint_memory():
+    '''This test is meant to be run manually, since it depends on
+    memory_profiler and its behavior may vary.'''
+    try:
+        from memory_profiler import memory_usage
+    except ImportError:
+        return
+
+    def f(a):
+        for _ in range(10):
+            a = np.sin(a**2 + 1)
+        return a
+    checkpointed_f = checkpoint(f)
+
+    def testfun(f, x):
+        for _ in range(5):
+            x = f(x)
+        return np.sum(x)
+    gradfun = grad(testfun, 1)
+
+    A = npr.RandomState(0).randn(100000)
+    max_usage              = max(memory_usage((gradfun, (f,              A))))
+    max_checkpointed_usage = max(memory_usage((gradfun, (checkpointed_f, A))))
+
+    assert max_checkpointed_usage < max_usage / 2.