Extends type and shape tracing with device (pytorch#9796)

mruberry · facebook-github-bot · commit 9b1a65bec3bb · 2018-08-07T12:25:17.000-07:00
Summary: This PR extends the existing type and shape metadata tracing and verification done in autograd with device information. This expansion of tracing is required for pytorch#8354, is likely useful in other scenarios, and is a healthy sanity check, just like type and shape tracing. The precise changes are: - TypeAndShape -> InputMetadata, now includes device() - Creating InputMetadata is simplified to just require a tensor, and callers were updated to use this simpler invocation wherever possible - The gradient accumulator of a variable is now reset when set_data() is called if either the type or device changes, and this reset now locks to avoid contention with acquiring the gradient accumulator - Mismatched devices during backward() will throw a runtime error, just like mismatched type and shape - (Bonus!) Two uninitialized pointers in THCReduce are now initialized (to nullptr) to prevent build warnings fyi colesbury Pull Request resolved: pytorch#9796 Reviewed By: goldsborough Differential Revision: D9119325 Pulled By: ezyang fbshipit-source-id: 76d1861b8d4f74db0575ff1f3bd965e18f9463de
diff --git a/aten/src/THC/THCReduce.cuh b/aten/src/THC/THCReduce.cuh
@@ -517,9 +517,9 @@ bool THC_reduceDim(THCState* state,
         (TYPE) outElements, init, modifyOp, reduceOp, finalizeOp);      \
     }                                                                   \
     else                                                                \
-    {                                                                        \
-        void* stagingData;                                                   \
-        void* semaphores;                                                    \
+    {                                                                   \
+        void* stagingData = nullptr;                                    \
+        void* semaphores = nullptr;                                     \
                                                                              \
         if(grid.y > 1)                                                       \
         {                                                                    \
diff --git a/tools/autograd/templates/VariableType.cpp b/tools/autograd/templates/VariableType.cpp
@@ -343,7 +343,7 @@ static void throw_error_out_requires_grad(const char* name) {
 
 static void rebase_history(Variable& var, std::shared_ptr<Function> grad_fn) {
   if (grad_fn && var.defined()) {
-    grad_fn->add_input_metadata(var.type(), var.sizes());
+    grad_fn->add_input_metadata(var);
     var.rebase_history({std::move(grad_fn), 0});
   }
 }
@@ -353,7 +353,7 @@ static void rebase_history(ArrayRef<Variable> vars, std::shared_ptr<Function> gr
     for (auto& var : vars) {
       if (var.defined()) {
         // TODO: eliminate const_cast
-        auto output_nr = grad_fn->add_input_metadata(var.type(), var.sizes());
+        auto output_nr = grad_fn->add_input_metadata(var);
         const_cast<Variable&>(var).rebase_history({grad_fn, output_nr});
       } else {
         grad_fn->add_input_metadata(Function::undefined_input());
diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
@@ -338,6 +338,13 @@ static void validate_outputs(const edge_list& edges, variable_list& grads, const
       ss << metadata.type() << " but got " << grads[i].type();
       throw std::runtime_error(format_error(ss.str()));
     }
+    const auto output_device = output.is_cuda() ? output.get_device() : -1;
+    if (output_device != metadata.device()) {
+      std::stringstream ss;
+      ss << "invalid gradient at index " << i << " - expected device ";
+      ss << metadata.device() << " but got " << output_device;
+      throw std::runtime_error(format_error(ss.str()));
+    }
   }
 }
 
diff --git a/torch/csrc/autograd/function.h b/torch/csrc/autograd/function.h
@@ -5,7 +5,7 @@
 #include "torch/csrc/autograd/anomaly_mode.h"
 #include "torch/csrc/autograd/profiler.h"
 #include "torch/csrc/autograd/saved_variable.h"
-#include "torch/csrc/autograd/type_and_shape.h"
+#include "torch/csrc/autograd/input_metadata.h"
 #include "torch/csrc/autograd/variable.h"
 #include "torch/csrc/utils/python_stub.h"
 #include "torch/csrc/utils/variadic.h"
@@ -128,9 +128,18 @@ struct TORCH_API Function : std::enable_shared_from_this<Function> {
 
   /// Adds the type and shape metadata for a new input. Returns the index of
   /// of the new input.
-  uint32_t add_input_metadata(const at::Type& type, at::IntList shape) noexcept {
+  uint32_t add_input_metadata(
+    const at::Type& type
+  , at::IntList shape
+  , const int64_t device) noexcept {
     uint32_t input_nr = input_metadata_.size();
-    input_metadata_.emplace_back(type, shape);
+    input_metadata_.emplace_back(type, shape, device);
+    return input_nr;
+  }
+
+  uint32_t add_input_metadata(const at::Tensor& t) noexcept {
+    uint32_t input_nr = input_metadata_.size();
+    input_metadata_.emplace_back(t);
     return input_nr;
   }
 
@@ -145,7 +154,7 @@ struct TORCH_API Function : std::enable_shared_from_this<Function> {
     return input_metadata_.size();
   }
 
-  const TypeAndShape& input_metadata(size_t index) const {
+  const InputMetadata& input_metadata(size_t index) const {
     return input_metadata_[index];
   }
 
@@ -322,7 +331,7 @@ struct TORCH_API Function : std::enable_shared_from_this<Function> {
   std::unique_ptr<AnomalyMetadata> anomaly_metadata_ = nullptr;
   std::vector<std::unique_ptr<FunctionPreHook>> pre_hooks_;
   std::vector<std::unique_ptr<FunctionPostHook>> post_hooks_;
-  at::SmallVector<TypeAndShape, 2> input_metadata_;
+  at::SmallVector<InputMetadata, 2> input_metadata_;
 };
 
 /// See Function::is_traceable() for definition.
@@ -367,7 +376,7 @@ inline void create_gradient_edge(
     Variable& variable,
     std::shared_ptr<Function> function) {
   // Copy before move.
-  const auto input_nr = function->add_input_metadata(variable.type(), variable.sizes());
+  const auto input_nr = function->add_input_metadata(variable);
   variable.set_gradient_edge({std::move(function), input_nr});
 }
 
diff --git a/torch/csrc/autograd/functions/accumulate_grad.cpp b/torch/csrc/autograd/functions/accumulate_grad.cpp
@@ -19,7 +19,7 @@ namespace torch { namespace autograd {
 AccumulateGrad::AccumulateGrad(Variable variable_)
     : Function(/*sequence_nr=*/UINT64_MAX)
     , variable(std::move(variable_)) {
-  add_input_metadata(variable.type(), variable.sizes());
+  add_input_metadata(variable);
 }
 
 auto AccumulateGrad::apply(variable_list&& grads) -> variable_list {
diff --git a/torch/csrc/autograd/functions/tensor.cpp b/torch/csrc/autograd/functions/tensor.cpp
@@ -43,7 +43,7 @@ CopySlices::CopySlices(
       fn(std::move(fn_)) {
   // Take the next_edges of fn as our own, except for index 0 which goes
   // to base instead of the view.
-  add_input_metadata(base_var.type(), base_var.sizes());
+  add_input_metadata(base_var);
   const auto num_outputs = fn->num_outputs();
   next_edges_.reserve(num_outputs);
   add_next_edge(base_var.gradient_edge());
diff --git a/torch/csrc/autograd/functions/utils.h b/torch/csrc/autograd/functions/utils.h
@@ -54,7 +54,7 @@ inline void set_history(
   if (grad_fn) {
     if (variable.defined()) {
       auto output_nr =
-          grad_fn->add_input_metadata(variable.type(), variable.sizes());
+          grad_fn->add_input_metadata(variable);
       as_variable_ref(variable).set_gradient_edge({grad_fn, output_nr});
     } else {
       grad_fn->add_input_metadata(Function::undefined_input());
diff --git a/torch/csrc/autograd/input_metadata.h b/torch/csrc/autograd/input_metadata.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+#include <cstdint>
+
+namespace torch { namespace autograd {
+
+/// A tensor's type and shape. Each Function records the required type and
+/// shape of its inputs. If is_valid() is false, then the corresponding input
+/// is not used and may be an undefined tensor.
+struct InputMetadata {
+  InputMetadata() = default;
+
+  InputMetadata(const at::Type& type, at::IntList shape, const int64_t device)
+  : type_{&type} , shape_{shape}, device_{device} { }
+
+  InputMetadata(const at::Tensor& t) 
+  : InputMetadata(t.type(), t.sizes(), t.is_cuda() ? t.get_device() : - 1) { }
+
+  bool is_valid() const {
+    return type_ != nullptr;
+  }
+
+  const at::Type& type() const {
+    AT_ASSERT(type_);
+    return *type_;
+  }
+
+  at::IntList shape() const {
+    return shape_;
+  }
+
+  int64_t device() const {
+    return device_;
+  }
+
+private:
+  const at::Type* type_ = nullptr;
+  at::DimVector shape_;
+  const int64_t device_ = -1;
+};
+
+}}
diff --git a/torch/csrc/autograd/python_function.cpp b/torch/csrc/autograd/python_function.cpp
@@ -433,7 +433,7 @@ static void _wrap_outputs(THPFunction *self,
     // to set_history wins.
     auto var = as_variable(obj, i);
     if (cdata) {
-      auto output_nr = cdata->add_input_metadata(var.type(), var.sizes());
+      auto output_nr = cdata->add_input_metadata(var);
       AT_ASSERT(i == (int)output_nr);
     }
     set_history(var, i, is_input, is_modified, is_differentiable);
diff --git a/torch/csrc/autograd/python_legacy_variable.cpp b/torch/csrc/autograd/python_legacy_variable.cpp
@@ -57,7 +57,7 @@ static PyObject *THPVariable_pynew(PyTypeObject* type, PyObject *args, PyObject
   Variable var;
   if (grad_fn) {
     auto grad_fn_ = THPFunction_asFunction((THPFunction*)grad_fn);
-    Edge edge(grad_fn_, grad_fn_->add_input_metadata(tensor.type(), tensor.sizes()));
+    Edge edge(grad_fn_, grad_fn_->add_input_metadata(tensor));
     var = make_variable(std::move(tensor), std::move(edge));
   } else {
     var = make_variable(std::move(tensor), requires_grad);
diff --git a/torch/csrc/autograd/type_and_shape.h b/torch/csrc/autograd/type_and_shape.h
@@ -1,33 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-namespace torch { namespace autograd {
-
-/// A tensor's type and shape. Each Function records the required type and
-/// shape of its inputs. If is_valid() is false, then the corresponding input
-/// is not used and may be an undefined tensor.
-struct TypeAndShape {
-  TypeAndShape() : type_(nullptr) {}
-
-  TypeAndShape(const at::Type& type, at::IntList shape)
-    : type_(&type) , shape_(shape) {}
-
-  bool is_valid() const {
-    return type_ != nullptr;
-  }
-
-  const at::Type& type() const {
-    AT_ASSERT(type_);
-    return *type_;
-  }
-
-  at::IntList shape() const {
-    return shape_;
-  }
-
-  const at::Type* type_;
-  at::DimVector shape_;
-};
-
-}}
diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp
@@ -117,13 +117,22 @@ void Variable::Impl::backward(
 }
 
 void Variable::Impl::set_data(Tensor new_data) {
-  if (new_data.type() != data_.type()) {
-    scalar_type_ = new_data.type().scalarType();
-    backend_ = new_data.type().backend();
-    is_variable_ = true;
-    // Clear grad_accumulator if it exists, since it stores the old type info.
-    grad_accumulator_.reset();
+  // Resets gradient accumulator if metadata is out of date
+  std::lock_guard<std::mutex> lock(mutex_);
+  auto prior_accumulator = grad_accumulator_.lock();
+  if (prior_accumulator) {
+    const auto prior_device = prior_accumulator->input_metadata(0).device();
+    const auto new_device = new_data.is_cuda() ? new_data.get_device() : -1;
+    
+    if (new_data.type() != data_.type() || prior_device != new_device) {
+      grad_accumulator_.reset();
+    }
   }
+  
+  // Updates metadata
+  scalar_type_ = new_data.type().scalarType();
+  backend_ = new_data.type().backend();
+  is_variable_ = true;
   data_ = std::move(new_data);
 }
 
@@ -160,7 +169,10 @@ std::shared_ptr<Function>& Variable::ViewImpl::get_grad_fn() {
     fn->stride = strides().vec();
     fn->storage_offset = data_.storage_offset();
     fn->set_next_edges(collect_next_edges(base_));
-    fn->add_input_metadata(base_.type(), sizes());
+    fn->add_input_metadata(
+      base_.type()
+    , sizes() // Note: sizes(), not base_.sizes(), is intentional
+    , base_.is_cuda() ? base_.get_device() : -1);
     grad_fn_ = std::move(fn);
     attr_version = current_version;
   }

Original file line number	Diff line number	Diff line change
`@@ -338,6 +338,13 @@ static void validate_outputs(const edge_list& edges, variable_list& grads, const`
`338`	`338`	`ss << metadata.type() << " but got " << grads[i].type();`
`339`	`339`	`throw std::runtime_error(format_error(ss.str()));`
`340`	`340`	`}`
	`341`	`+ const auto output_device = output.is_cuda() ? output.get_device() : -1;`
	`342`	`+ if (output_device != metadata.device()) {`
	`343`	`+ std::stringstream ss;`
	`344`	`+ ss << "invalid gradient at index " << i << " - expected device ";`
	`345`	`+ ss << metadata.device() << " but got " << output_device;`
	`346`	`+ throw std::runtime_error(format_error(ss.str()));`
	`347`	`+ }`
`341`	`348`	`}`
`342`	`349`	`}`
`343`	`350`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ namespace torch { namespace autograd {`
`19`	`19`	`AccumulateGrad::AccumulateGrad(Variable variable_)`
`20`	`20`	`: Function(/sequence_nr=/UINT64_MAX)`
`21`	`21`	`, variable(std::move(variable_)) {`
`22`		`- add_input_metadata(variable.type(), variable.sizes());`
	`22`	`+ add_input_metadata(variable);`
`23`	`23`	`}`
`24`	`24`
`25`	`25`	`auto AccumulateGrad::apply(variable_list&& grads) -> variable_list {`
Original file line number	Diff line number	Diff line change
`@@ -433,7 +433,7 @@ static void _wrap_outputs(THPFunction *self,`
`433`	`433`	`// to set_history wins.`
`434`	`434`	`auto var = as_variable(obj, i);`
`435`	`435`	`if (cdata) {`
`436`		`- auto output_nr = cdata->add_input_metadata(var.type(), var.sizes());`
	`436`	`+ auto output_nr = cdata->add_input_metadata(var);`
`437`	`437`	`AT_ASSERT(i == (int)output_nr);`
`438`	`438`	`}`
`439`	`439`	`set_history(var, i, is_input, is_modified, is_differentiable);`