Fix segfault and add an end-to-end stepper check

1tnguyen · 1tnguyen · commit 646f4f7f79be · 2025-02-07T02:08:42.000Z
Signed-off-by: Thien Nguyen &lt;thiennguyen@nvidia.com&gt;
diff --git a/runtime/cudaq/cudm_state.h b/runtime/cudaq/cudm_state.h
@@ -72,6 +72,10 @@ class cudm_state {
   /// @return A copy of the raw data as a vector of complex numbers.
   std::vector<std::complex<double>> get_raw_data() const;
 
+  /// @brief Get the pointer to device memory buffer storing the state.
+  /// @return GPU device pointer
+  void *get_device_pointer() const;
+
   /// @brief Get a copy of the hilbert space dimensions for the quantum state.
   /// @return A copy of the hilbert space dimensions of a vector of integers.
   std::vector<int64_t> get_hilbert_space_dims() const;
diff --git a/runtime/cudaq/dynamics/cudm_helpers.cpp b/runtime/cudaq/dynamics/cudm_helpers.cpp
@@ -10,13 +10,13 @@
 #include "cudaq/cudm_error_handling.h"
 
 namespace cudaq {
-// Function to flatten a matrix into a 1D array
+// Function to flatten a matrix into a 1D array (column major)
 std::vector<std::complex<double>> flatten_matrix(const matrix_2 &matrix) {
   std::vector<std::complex<double>> flat_matrix;
-
-  for (size_t i = 0; i < matrix.get_rows(); i++) {
-    for (size_t j = 0; j < matrix.get_columns(); j++) {
-      flat_matrix.push_back(matrix[{i, j}]);
+  flat_matrix.reserve(matrix.get_size());
+  for (size_t col = 0; col < matrix.get_columns(); col++) {
+    for (size_t row = 0; row < matrix.get_rows(); row++) {
+      flat_matrix.push_back(matrix[{row, col}]);
     }
   }
 
@@ -53,19 +53,14 @@ cudensitymatElementaryOperator_t create_elementary_operator(
 
   cudensitymatElementaryOperator_t cudm_elem_op = nullptr;
 
-  std::vector<double> interleaved_matrix;
-  interleaved_matrix.reserve(flat_matrix.size() * 2);
-
-  for (const auto &value : flat_matrix) {
-    interleaved_matrix.push_back(value.real());
-    interleaved_matrix.push_back(value.imag());
-  }
+  // FIXME: leak (need to track this buffer somewhere and delete **after** the
+  // whole evolve)
+  auto *elementaryMat_d = create_array_gpu(flat_matrix);
 
   cudensitymatStatus_t status = cudensitymatCreateElementaryOperator(
       handle, static_cast<int32_t>(subspace_extents.size()),
       subspace_extents.data(), CUDENSITYMAT_OPERATOR_SPARSITY_NONE, 0, nullptr,
-      CUDA_C_64F, static_cast<void *>(interleaved_matrix.data()),
-      {nullptr, nullptr}, &cudm_elem_op);
+      CUDA_C_64F, elementaryMat_d, {nullptr, nullptr}, &cudm_elem_op);
 
   if (status != CUDENSITYMAT_STATUS_SUCCESS) {
     std::cerr << "Error: Failed to create elementary operator. Status: "
@@ -92,7 +87,7 @@ void append_elementary_operator_to_term(
   std::vector<cudensitymatElementaryOperator_t> elem_ops = {elem_op};
 
   std::vector<int32_t> modeActionDuality(degrees.size(), 0);
-
+  assert(elem_ops.size() == degrees.size());
   HANDLE_CUDM_ERROR(cudensitymatOperatorTermAppendElementaryProduct(
       handle, term, static_cast<int32_t>(degrees.size()), elem_ops.data(),
       degrees.data(), modeActionDuality.data(), make_cuDoubleComplex(1.0, 0.0),
@@ -233,13 +228,15 @@ cudensitymatOperator_t convert_to_cudensitymat_operator(
           handle, operator_handle, term, 0, make_cuDoubleComplex(1.0, 0.0),
           {nullptr, nullptr}));
 
+      // FIXME: leak
+      // We must track these handles and destroy **after** evolve finishes
       // Destroy the term
-      HANDLE_CUDM_ERROR(cudensitymatDestroyOperatorTerm(term));
+      // HANDLE_CUDM_ERROR(cudensitymatDestroyOperatorTerm(term));
 
-      // Cleanup
-      for (auto &elem_op : elementary_operators) {
-        HANDLE_CUDM_ERROR(cudensitymatDestroyElementaryOperator(elem_op));
-      }
+      // // Cleanup
+      // for (auto &elem_op : elementary_operators) {
+      //   HANDLE_CUDM_ERROR(cudensitymatDestroyElementaryOperator(elem_op));
+      // }
     }
 
     return operator_handle;
diff --git a/runtime/cudaq/dynamics/cudm_state.cpp b/runtime/cudaq/dynamics/cudm_state.cpp
@@ -120,6 +120,8 @@ std::vector<std::complex<double>> cudm_state::get_raw_data() const {
   return rawData_;
 }
 
+void *cudm_state::get_device_pointer() const { return gpuData_; }
+
 std::vector<int64_t> cudm_state::get_hilbert_space_dims() const {
   return hilbertSpaceDims_;
 }
diff --git a/unittests/dynamics/test_cudm_time_stepper.cpp b/unittests/dynamics/test_cudm_time_stepper.cpp
@@ -91,3 +91,28 @@ TEST_F(CuDensityMatTimeStepperTest, ComputeStepZeroStepSize) {
 TEST_F(CuDensityMatTimeStepperTest, ComputeStepLargeTimeValues) {
   EXPECT_NO_THROW(time_stepper_->compute(*state_, 1e6, 1e3));
 }
+
+TEST_F(CuDensityMatTimeStepperTest, ComputeStepCheckOutput) {
+  const std::vector<std::complex<double>> initialState = {
+      {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}};
+  const std::vector<int64_t> dims = {4};
+  auto inputState = std::make_unique<cudm_state>(handle_, initialState, dims);
+  auto op = cudaq::matrix_operator::create(0);
+  auto cudmOp = cudaq::convert_to_cudensitymat_operator<cudaq::matrix_operator>(
+      handle_, {}, op, dims); // Initialize the time stepper
+  auto time_stepper = std::make_unique<cudm_time_stepper>(handle_, cudmOp);
+  auto outputState = time_stepper->compute(*inputState, 0.0, 1.0);
+
+  std::vector<std::complex<double>> outputStateVec(4);
+  HANDLE_CUDA_ERROR(cudaMemcpy(
+      outputStateVec.data(), outputState.get_device_pointer(),
+      outputStateVec.size() * sizeof(std::complex<double>), cudaMemcpyDefault));
+  // Create operator move the state up 1 step.
+  const std::vector<std::complex<double>> expectedOutputState = {
+      {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}};
+
+  for (std::size_t i = 0; i < expectedOutputState.size(); ++i) {
+    EXPECT_TRUE(std::abs(expectedOutputState[i] - outputStateVec[i]) < 1e-12);
+  }
+  HANDLE_CUDM_ERROR(cudensitymatDestroyOperator(cudmOp));
+}

Original file line number	Diff line number	Diff line change
`@@ -120,6 +120,8 @@ std::vector<std::complex<double>> cudm_state::get_raw_data() const {`
`120`	`120`	`return rawData_;`
`121`	`121`	`}`
`122`	`122`
	`123`	`+void *cudm_state::get_device_pointer() const { return gpuData_; }`
	`124`	`+`
`123`	`125`	`std::vector<int64_t> cudm_state::get_hilbert_space_dims() const {`
`124`	`126`	`return hilbertSpaceDims_;`
`125`	`127`	`}`