C++ Frontend: adding two distributed samples (Random and Sequential) (pytorch#16910)

jaliyae · facebook-github-bot · commit 9477c143c689 · 2019-02-19T05:40:37.000-08:00
Summary: Adding two distrbuted samplers, Random and Sequential to the mix. Similar to python counterpart, DistributedSampler introduces a new method `set_epoch(size_t epoch)` which can be use to shuffle data determinstically between distributed processes. Pull Request resolved: pytorch#16910 Differential Revision: D14130980 Pulled By: soumith fbshipit-source-id: ec08b7130c01e2fc6dc3693f7ac622a0a6d60f10
diff --git a/test/cpp/api/dataloader.cpp b/test/cpp/api/dataloader.cpp
@@ -831,6 +831,188 @@ TEST(DataTest, CanUseCustomTypeAsIndexType) {
   }
 }
 
+TEST(DataTest, DistributedRandomSamplerSingleReplicaProduceCorrectSamples) {
+  size_t sample_count = 10;
+  samplers::DistributedRandomSampler drs(sample_count);
+
+  std::vector<size_t> res;
+  torch::optional<std::vector<size_t>> idx;
+  while ((idx = drs.next(3)).has_value()) {
+    res.insert(std::end(res), std::begin(*idx), std::end(*idx));
+  }
+
+  ASSERT_EQ(res.size(), sample_count);
+
+  std::sort(res.begin(), res.end());
+  for (size_t i = 0; i < res.size(); ++i) {
+    ASSERT_EQ(res[i], i);
+  }
+}
+
+TEST(DataTest, DistributedRandomSamplerMultiReplicaProduceCorrectSamples) {
+  size_t sample_count = 10;
+  size_t num_replicas = 3;
+
+  auto test_function = [&](bool allow_duplicates,
+                           size_t local_sample_count,
+                           std::vector<size_t>& output,
+                           size_t batch_size) {
+    std::vector<std::unique_ptr<samplers::DistributedRandomSampler>> samplers;
+
+    for (size_t i = 0; i < num_replicas; ++i) {
+      samplers.emplace_back(
+          torch::make_unique<samplers::DistributedRandomSampler>(
+              sample_count, num_replicas, i, allow_duplicates));
+    }
+
+    std::vector<size_t> res;
+    for (size_t i = 0; i < num_replicas; ++i) {
+      (*samplers[i]).reset();
+      torch::optional<std::vector<size_t>> idx;
+      while ((idx = (*samplers[i]).next(batch_size)).has_value()) {
+        res.insert(std::end(res), std::begin(*idx), std::end(*idx));
+      }
+      ASSERT_EQ(res.size(), local_sample_count * (i + 1));
+    }
+    std::sort(res.begin(), res.end());
+    ASSERT_EQ(res, output);
+  };
+
+  for (size_t batch_size = 1; batch_size <= 3; ++batch_size) {
+    size_t local_sample_count =
+        static_cast<size_t>(std::ceil(sample_count * 1.0 / num_replicas));
+    std::vector<size_t> output1{0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    test_function(true, local_sample_count, output1, batch_size);
+
+    local_sample_count =
+        static_cast<size_t>(std::floor(sample_count * 1.0 / num_replicas));
+    std::vector<size_t> output2{0, 1, 2, 3, 4, 5, 6, 7, 8};
+    test_function(false, local_sample_count, output2, batch_size);
+  }
+}
+
+TEST(DataTest, CanSaveAndLoadDistributedRandomSampler) {
+  {
+    samplers::DistributedRandomSampler a(10);
+    ASSERT_EQ(a.index(), 0);
+    std::stringstream stream;
+    torch::save(a, stream);
+
+    samplers::DistributedRandomSampler b(10);
+    torch::load(b, stream);
+    ASSERT_EQ(b.index(), 0);
+  }
+  {
+    samplers::DistributedRandomSampler a(10);
+    a.next(3);
+    a.next(4);
+    ASSERT_EQ(a.index(), 7);
+    std::stringstream stream;
+    torch::save(a, stream);
+
+    samplers::DistributedRandomSampler b(10);
+    torch::load(b, stream);
+    ASSERT_EQ(b.index(), 7);
+  }
+  {
+    samplers::DistributedRandomSampler a(10);
+    a.set_epoch(3); 
+    std::stringstream stream;
+    torch::save(a, stream);
+
+    samplers::DistributedRandomSampler b(10);
+    torch::load(b, stream);
+    ASSERT_EQ(b.epoch(), 3);
+  }
+}
+
+TEST(DataTest, DistributedSequentialSamplerSingleReplicaProduceCorrectSamples) {
+  size_t sample_count = 10;
+  size_t batch_size = 3;
+  samplers::DistributedSequentialSampler dss(sample_count);
+
+  std::vector<size_t> res;
+  torch::optional<std::vector<size_t>> idx;
+  while ((idx = dss.next(batch_size)).has_value()) {
+    res.insert(std::end(res), std::begin(*idx), std::end(*idx));
+  }
+
+  ASSERT_EQ(res.size(), sample_count);
+
+  std::sort(res.begin(), res.end());
+  for (size_t i = 0; i < res.size(); ++i) {
+    ASSERT_EQ(res[i], i);
+  }
+}
+
+TEST(DataTest, DistributedSequentialSamplerMultiReplicaProduceCorrectSamples) {
+  size_t sample_count = 10;
+  size_t num_replicas = 3;
+
+  auto test_function = [&](bool allow_duplicates,
+                           size_t local_sample_count,
+                           std::vector<size_t>& output,
+                           size_t batch_size) {
+    std::vector<std::unique_ptr<samplers::DistributedSequentialSampler>>
+        samplers;
+
+    for (size_t i = 0; i < num_replicas; ++i) {
+      samplers.emplace_back(
+          torch::make_unique<samplers::DistributedSequentialSampler>(
+              sample_count, num_replicas, i, allow_duplicates));
+    }
+
+    std::vector<size_t> res;
+    for (size_t i = 0; i < num_replicas; ++i) {
+      (*samplers[i]).reset();
+      torch::optional<std::vector<size_t>> idx;
+      while ((idx = (*samplers[i]).next(batch_size)).has_value()) {
+        res.insert(std::end(res), std::begin(*idx), std::end(*idx));
+      }
+      ASSERT_EQ(res.size(), local_sample_count * (i + 1));
+    }
+    std::sort(res.begin(), res.end());
+    ASSERT_EQ(res, output);
+  };
+
+  for (size_t batch_size = 1; batch_size <= 3; ++batch_size) {
+    size_t local_sample_count =
+        static_cast<size_t>(std::ceil(sample_count * 1.0 / num_replicas));
+    std::vector<size_t> output1{0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    test_function(true, local_sample_count, output1, batch_size);
+
+    local_sample_count =
+        static_cast<size_t>(std::floor(sample_count * 1.0 / num_replicas));
+    std::vector<size_t> output2{0, 1, 2, 3, 4, 5, 6, 7, 8};
+    test_function(false, local_sample_count, output2, batch_size);
+  }
+}
+
+TEST(DataTest, CanSaveAndLoadDistributedSequentialSampler) {
+  {
+    samplers::DistributedSequentialSampler a(10);
+    ASSERT_EQ(a.index(), 0);
+    std::stringstream stream;
+    torch::save(a, stream);
+
+    samplers::DistributedSequentialSampler b(10);
+    torch::load(b, stream);
+    ASSERT_EQ(b.index(), 0);
+  }
+  {
+    samplers::DistributedSequentialSampler a(10);
+    a.next(3);
+    a.next(4);
+    ASSERT_EQ(a.index(), 7);
+    std::stringstream stream;
+    torch::save(a, stream);
+
+    samplers::DistributedSequentialSampler b(10);
+    torch::load(b, stream);
+    ASSERT_EQ(b.index(), 7);
+  }
+}
+
 TEST(DataLoaderTest, DataLoaderOptionsDefaultAsExpected) {
   DataLoaderOptions partial_options;
   FullDataLoaderOptions full_options(partial_options);
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
@@ -225,6 +225,7 @@ if (NOT NO_API)
   list(APPEND TORCH_SRCS
     ${TORCH_SRC_DIR}/csrc/api/src/cuda.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/datasets/mnist.cpp
+    ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/distributed.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/random.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/sequential.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/stream.cpp
diff --git a/torch/csrc/api/include/torch/data/samplers.h b/torch/csrc/api/include/torch/data/samplers.h
@@ -2,6 +2,7 @@
 
 #include <torch/data/samplers/base.h>
 #include <torch/data/samplers/custom_batch_request.h>
+#include <torch/data/samplers/distributed.h>
 #include <torch/data/samplers/random.h>
 #include <torch/data/samplers/sequential.h>
 #include <torch/data/samplers/serialize.h>
diff --git a/torch/csrc/api/include/torch/data/samplers/distributed.h b/torch/csrc/api/include/torch/data/samplers/distributed.h
@@ -0,0 +1,134 @@
+#pragma once
+
+#include <torch/csrc/WindowsTorchApiMacro.h>
+#include <torch/data/samplers/base.h>
+
+#include <cstddef>
+#include <vector>
+
+namespace torch {
+namespace serialize {
+class OutputArchive;
+class InputArchive;
+} // namespace serialize
+} // namespace torch
+
+namespace torch {
+namespace data {
+namespace samplers {
+
+/// A `Sampler` that selects a subset of indices to sample from and defines a
+/// sampling behavior. In a distributed setting, this selects a subset of the
+/// indices depending on the provided num_replicas and rank parameters. The
+/// `Sampler` performs a rounding operation based on the `allow_duplicates`
+/// parameter to decide the local sample count.
+template <typename BatchRequest = std::vector<size_t>>
+class DistributedSampler : public Sampler<BatchRequest> {
+ public:
+  TORCH_API DistributedSampler(
+      size_t size,
+      size_t num_replicas = 1,
+      size_t rank = 0,
+      bool allow_duplicates = true)
+      : size_(size),
+        num_replicas_(num_replicas),
+        rank_(rank),
+        epoch_(0),
+        allow_duplicates_(allow_duplicates) {}
+
+  /// Set the epoch for the current enumeration. This can be used to alter the
+  /// sample selection and shuffling behavior.
+  TORCH_API void set_epoch(size_t epoch) {
+    epoch_ = epoch;
+  }
+
+  TORCH_API size_t epoch() const {
+    return epoch_;
+  }
+
+ protected:
+  size_t local_sample_count() {
+    if (allow_duplicates_) {
+      return (size_ + num_replicas_ - 1) / num_replicas_;
+    } else {
+      return size_ / num_replicas_;
+    }
+  }
+
+  size_t size_;
+  size_t num_replicas_;
+  size_t rank_;
+  size_t epoch_;
+  bool allow_duplicates_;
+};
+
+/// Select samples randomly. The sampling order is shuffled at each `reset()`
+/// call.
+class DistributedRandomSampler : public DistributedSampler<> {
+ public:
+  TORCH_API DistributedRandomSampler(
+      size_t size,
+      size_t num_replicas = 1,
+      size_t rank = 0,
+      bool allow_duplicates = true);
+
+  /// Resets the `DistributedRandomSampler` to a new set of indices.
+  TORCH_API void reset(optional<size_t> new_size = nullopt) override;
+
+  /// Returns the next batch of indices.
+  TORCH_API optional<std::vector<size_t>> next(size_t batch_size) override;
+
+  /// Serializes the `DistributedRandomSampler` to the `archive`.
+  TORCH_API void save(serialize::OutputArchive& archive) const override;
+
+  /// Deserializes the `DistributedRandomSampler` from the `archive`.
+  TORCH_API void load(serialize::InputArchive& archive) override;
+
+  /// Returns the current index of the `DistributedRandomSampler`.
+  TORCH_API size_t index() const noexcept;
+
+ private:
+  void populate_indices();
+
+  size_t begin_index_;
+  size_t end_index_;
+  size_t sample_index_;
+  std::vector<size_t> all_indices_;
+};
+
+/// Select samples sequentially.
+class DistributedSequentialSampler : public DistributedSampler<> {
+ public:
+  TORCH_API DistributedSequentialSampler(
+      size_t size,
+      size_t num_replicas = 1,
+      size_t rank = 0,
+      bool allow_duplicates = true);
+
+  /// Resets the `DistributedSequentialSampler` to a new set of indices.
+  TORCH_API void reset(optional<size_t> new_size = nullopt) override;
+
+  /// Returns the next batch of indices.
+  TORCH_API optional<std::vector<size_t>> next(size_t batch_size) override;
+
+  /// Serializes the `DistributedSequentialSampler` to the `archive`.
+  TORCH_API void save(serialize::OutputArchive& archive) const override;
+
+  /// Deserializes the `DistributedSequentialSampler` from the `archive`.
+  TORCH_API void load(serialize::InputArchive& archive) override;
+
+  /// Returns the current index of the `DistributedSequentialSampler`.
+  TORCH_API size_t index() const noexcept;
+
+ private:
+  void populate_indices();
+
+  size_t begin_index_;
+  size_t end_index_;
+  size_t sample_index_;
+  std::vector<size_t> all_indices_;
+};
+
+} // namespace samplers
+} // namespace data
+} // namespace torch
diff --git a/torch/csrc/api/src/data/samplers/distributed.cpp b/torch/csrc/api/src/data/samplers/distributed.cpp