dftfeDevelopers
diff --git a/‎CMakeLists.txt
+37-26 b/‎CMakeLists.txt
+37-26
diff --git a/‎include/AtomicCenteredNonLocalOperatorKernelsDevice.h
+1-1 b/‎include/AtomicCenteredNonLocalOperatorKernelsDevice.h
+1-1
diff --git a/‎include/BLASWrapper.h
+4-1 b/‎include/BLASWrapper.h
+4-1
diff --git a/‎include/KohnShamHamiltonianOperator.h
+1-1 b/‎include/KohnShamHamiltonianOperator.h
+1-1
diff --git a/‎include/constraintMatrixInfoDeviceKernels.h
+55 b/‎include/constraintMatrixInfoDeviceKernels.h
+55
diff --git a/‎include/densityCalculator.h
+2-29 b/‎include/densityCalculator.h
+2-29
diff --git a/‎include/densityCalculatorDeviceKernels.h
+85 b/‎include/densityCalculatorDeviceKernels.h
+85
diff --git a/‎include/densityFirstOrderResponseCalculator.h
+2-28 b/‎include/densityFirstOrderResponseCalculator.h
+2-28
@@ -208,57 +208,68 @@ SET(TARGET_SRC
   ./src/pseudo/oncv/atomCenteredPostProcessing.cc)
 
 IF ("${GPU_LANG}" STREQUAL "cuda")
-
+SET(DEVICE_HOST_SRC
+  ./src/linAlg/linearAlgebraOperationsDevice.cc
+  ./utils/constraintMatrixInfoDevice.cc
+  ./src/dft/solveVselfInBinsDevice.cc
+  ./src/linAlg/pseudoGSDevice.cc
+  ./src/linAlg/rayleighRitzDevice.cc
+  ./src/poisson/poissonSolverProblemDevice.cc
+  ./src/helmholtz/kerkerSolverProblemDevice.cc
+  ./src/solvers/eigenSolvers/chebyshevOrthogonalizedSubspaceIterationSolverDevice.cc
+  ./src/solvers/linearSolverDevice.cc
+  ./src/solvers/linearSolverProblemDevice.cc
+  ./src/solvers/linearSolverCGDevice.cc
+)
 SET(DEVICE_SRC
   ./utils/MemoryTransferKernelsDevice.cc
   ./utils/DeviceKernelsGeneric.cc
   ./utils/DeviceDirectCCLWrapper.cc
   ./src/dft/densityCalculatorDeviceKernels.cc
   ./src/dft/densityFirstOrderResponseCalculatorDeviceKernels.cc
-  ./src/dft/solveVselfInBinsDevice.cc
+  ./src/dft/solveVselfInBinsDeviceKernels.cc
   ./src/dft/kineticEnergyDensityCalculatorDeviceKernels.cc
-  ./src/solvers/eigenSolvers/chebyshevOrthogonalizedSubspaceIterationSolverDevice.cc
-  ./src/linAlg/linearAlgebraOperationsDevice.cc
-  ./src/linAlg/pseudoGSDevice.cc
-  ./src/linAlg/rayleighRitzDevice.cc
+  ./src/linAlg/linearAlgebraOperationsDeviceKernels.cc
   ./src/force/forceWfcContractionsDeviceKernels.cc
-  ./utils/constraintMatrixInfoDevice.cc
+  ./utils/constraintMatrixInfoDeviceKernels.cc
   ./utils/DeviceAPICalls.cu.cc
   ./utils/BLASWrapperDevice.cu.cc
   ./utils/MPICommunicatorP2PKernelsDevice.cc
-  ./src/solvers/linearSolverDevice.cc
-  ./src/solvers/linearSolverCGDevice.cc
-  ./src/solvers/linearSolverProblemDevice.cc
-  ./src/poisson/poissonSolverProblemDevice.cc
-  ./src/helmholtz/kerkerSolverProblemDevice.cc
+  ./src/solvers/linearSolverCGDeviceKernels.cc
+  ./src/poisson/poissonSolverProblemDeviceKernels.cc
   ./utils/FEBasisOperationsKernelsInternalDevice.cc
   ./src/atom/AtomicCenteredNonLocalOperatorKernelsDevice.cc
   )
-
 ELSEIF ("${GPU_LANG}" STREQUAL "hip")
-
+SET(DEVICE_HOST_SRC
+  ./src/linAlg/linearAlgebraOperationsDevice.cc
+  ./utils/constraintMatrixInfoDevice.cc
+  ./src/dft/solveVselfInBinsDevice.cc
+  ./src/linAlg/pseudoGSDevice.cc
+  ./src/linAlg/rayleighRitzDevice.cc
+  ./src/poisson/poissonSolverProblemDevice.cc
+  ./src/helmholtz/kerkerSolverProblemDevice.cc
+  ./src/solvers/eigenSolvers/chebyshevOrthogonalizedSubspaceIterationSolverDevice.cc
+  ./src/solvers/linearSolverDevice.cc
+  ./src/solvers/linearSolverProblemDevice.cc
+  ./src/solvers/linearSolverCGDevice.cc
+)
 SET(DEVICE_SRC
   ./utils/MemoryTransferKernelsDevice.cc
   ./utils/DeviceKernelsGeneric.cc
   ./utils/DeviceDirectCCLWrapper.cc
   ./src/dft/densityCalculatorDeviceKernels.cc
   ./src/dft/densityFirstOrderResponseCalculatorDeviceKernels.cc
-  ./src/dft/solveVselfInBinsDevice.cc
+  ./src/dft/solveVselfInBinsDeviceKernels.cc
   ./src/dft/kineticEnergyDensityCalculatorDeviceKernels.cc
-  ./src/solvers/eigenSolvers/chebyshevOrthogonalizedSubspaceIterationSolverDevice.cc
-  ./src/linAlg/linearAlgebraOperationsDevice.cc
-  ./src/linAlg/pseudoGSDevice.cc
-  ./src/linAlg/rayleighRitzDevice.cc
+  ./src/linAlg/linearAlgebraOperationsDeviceKernels.cc
   ./src/force/forceWfcContractionsDeviceKernels.cc
-  ./utils/constraintMatrixInfoDevice.cc
+  ./utils/constraintMatrixInfoDeviceKernels.cc
   ./utils/DeviceAPICalls.hip.cc
   ./utils/BLASWrapperDevice.hip.cc
   ./utils/MPICommunicatorP2PKernelsDevice.cc
-  ./src/solvers/linearSolverDevice.cc
-  ./src/solvers/linearSolverCGDevice.cc
-  ./src/solvers/linearSolverProblemDevice.cc
-  ./src/poisson/poissonSolverProblemDevice.cc
-  ./src/helmholtz/kerkerSolverProblemDevice.cc
+  ./src/solvers/linearSolverCGDeviceKernels.cc
+  ./src/poisson/poissonSolverProblemDeviceKernelss.cc
   ./utils/FEBasisOperationsKernelsInternalDevice.cc
   ./src/atom/AtomicCenteredNonLocalOperatorKernelsDevice.cc
   )
@@ -279,7 +290,7 @@ IF (WITH_GPU)
     ENDIF()
 ENDIF()
 IF (WITH_GPU)
-  LIST(APPEND TARGET_SRC ${DEVICE_SRC})
+  LIST(APPEND TARGET_SRC ${DEVICE_HOST_SRC} ${DEVICE_SRC})
 ENDIF()
 
 
 
@@ -24,7 +24,7 @@
 #include <DeviceDataTypeOverloads.h>
 #include <DeviceTypeConfig.h>
 #include <DeviceKernelLauncherConstants.h>
-
+#include <MemoryStorage.h>
 namespace dftfe
 {
   namespace AtomicCenteredNonLocalOperatorKernelsDevice
 
@@ -24,7 +24,9 @@
 #include <TypeConfig.h>
 #include <DeviceTypeConfig.h>
 #include <cmath>
-
+#if defined(DFTFE_WITH_DEVICE)
+#  include "Exceptions.h"
+#endif
 namespace dftfe
 {
   namespace linearAlgebra
@@ -665,6 +667,7 @@ namespace dftfe
     private:
     };
 #if defined(DFTFE_WITH_DEVICE)
+#  include "Exceptions.h"
     enum class tensorOpDataType
     {
       fp32,
 
@@ -204,8 +204,8 @@ namespace dftfe
 
     void
     setVEffExternalPotCorrToZero();
-  private:
 
+  private:
     std::shared_ptr<
       AtomicCenteredNonLocalOperator<dataTypes::number, memorySpace>>
       d_ONCVnonLocalOperator;
 
@@ -0,0 +1,55 @@
+#ifndef constraintMatrixInfoDeviceKernels_H
+#define constraintMatrixInfoDeviceKernels_H
+#include <DeviceAPICalls.h>
+#include <DeviceDataTypeOverloads.h>
+#include <DeviceKernelLauncherConstants.h>
+
+
+namespace dftfe
+{
+  // Declare dftUtils functions
+  namespace dftUtils
+  {
+    template <typename ValueType>
+    void
+    distributeDevice(
+      const unsigned int  contiguousBlockSize,
+      ValueType *         xVec,
+      const unsigned int *constraintLocalRowIdsUnflattened,
+      const unsigned int  numConstraints,
+      const unsigned int *constraintRowSizes,
+      const unsigned int *constraintRowSizesAccumulated,
+      const unsigned int *constraintLocalColumnIdsAllRowsUnflattened,
+      const double *      constraintColumnValuesAllRowsUnflattened,
+      const double *      inhomogenities);
+
+    template <typename ValueType>
+    void
+    distributeSlaveToMasterAtomicAddDevice(
+      const unsigned int  contiguousBlockSize,
+      ValueType *         xVec,
+      const unsigned int *constraintLocalRowIdsUnflattened,
+      const unsigned int  numConstraints,
+      const unsigned int *constraintRowSizes,
+      const unsigned int *constraintRowSizesAccumulated,
+      const unsigned int *constraintLocalColumnIdsAllRowsUnflattened,
+      const double *      constraintColumnValuesAllRowsUnflattened);
+
+    template <typename ValueType>
+    void
+    setzeroDevice(const unsigned int  contiguousBlockSize,
+                  ValueType *         xVec,
+                  const unsigned int *constraintLocalRowIdsUnflattened,
+                  const unsigned int  numConstraints);
+    void
+    scaleConstraintsDevice(
+      const double *      xVec,
+      const unsigned int *constraintLocalRowIdsUnflattened,
+      const unsigned int  numConstraints,
+      const unsigned int *constraintRowSizes,
+      const unsigned int *constraintRowSizesAccumulated,
+      const unsigned int *constraintLocalColumnIdsAllRowsUnflattened,
+      double *            constraintColumnValuesAllRowsUnflattened);
+  } // namespace dftUtils
+} // namespace dftfe
+#endif
@@ -21,7 +21,7 @@
 #include <headers.h>
 #include "dftParameters.h"
 #include "FEBasisOperations.h"
-
+#include "densityCalculatorDeviceKernels.h"
 namespace dftfe
 {
   template <typename NumberType, dftfe::utils::MemorySpace memorySpace>
@@ -59,15 +59,12 @@ namespace dftfe
   template <typename NumberType>
   void
   computeRhoGradRhoFromInterpolatedValues(
-    std::shared_ptr<
-      dftfe::basis::
-        FEBasisOperations<NumberType, double, dftfe::utils::MemorySpace::HOST>>
-      &basisOperationsPtr,
     std::shared_ptr<
       dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::HOST>>
       &                                         BLASWrapperPtr,
     const std::pair<unsigned int, unsigned int> cellRange,
     const std::pair<unsigned int, unsigned int> vecRange,
+    const unsigned int                          nQuadsPerCell,
     double *                                    partialOccupVec,
     NumberType *                                wfcQuadPointData,
     NumberType *                                gradWfcQuadPointData,
@@ -77,29 +74,5 @@ namespace dftfe
     double *                                    gradRho,
     const bool                                  isEvaluateGradRho);
 
-#if defined(DFTFE_WITH_DEVICE)
-  template <typename NumberType>
-  void
-  computeRhoGradRhoFromInterpolatedValues(
-    std::shared_ptr<
-      dftfe::basis::FEBasisOperations<NumberType,
-                                      double,
-                                      dftfe::utils::MemorySpace::DEVICE>>
-      &basisOperationsPtr,
-    std::shared_ptr<
-      dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
-      &                                         BLASWrapperPtr,
-    const std::pair<unsigned int, unsigned int> cellRange,
-    const std::pair<unsigned int, unsigned int> vecRange,
-    double *                                    partialOccupVec,
-    NumberType *                                wfcQuadPointData,
-    NumberType *                                gradWfcQuadPointData,
-    double *                                    rhoCellsWfcContributions,
-    double *                                    gradRhoCellsWfcContributions,
-    double *                                    rho,
-    double *                                    gradRho,
-    const bool                                  isEvaluateGradRho);
-#endif
-
 } // namespace dftfe
 #endif
@@ -0,0 +1,85 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (c) 2017-2025 The Regents of the University of Michigan and DFT-FE
+// authors.
+//
+// This file is part of the DFT-FE code.
+//
+// The DFT-FE code is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the DFT-FE distribution.
+//
+// ---------------------------------------------------------------------
+//
+
+#ifndef densityCalculatorDeviceKernels_H_
+#define densityCalculatorDeviceKernels_H_
+#if defined(DFTFE_WITH_DEVICE)
+
+#  include <BLASWrapper.h>
+#  include <DataTypeOverloads.h>
+#  include <DeviceAPICalls.h>
+#  include <DeviceDataTypeOverloads.h>
+#  include <DeviceTypeConfig.h>
+#  include <DeviceKernelLauncherConstants.h>
+#  include <memory>
+namespace dftfe
+{
+  template <typename NumberType>
+  void
+  computeRhoGradRhoFromInterpolatedValues(
+    std::shared_ptr<
+      dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
+      &                                         BLASWrapperPtr,
+    const std::pair<unsigned int, unsigned int> cellRange,
+    const std::pair<unsigned int, unsigned int> vecRange,
+    const unsigned int                          nQuadsPerCell,
+    double *                                    partialOccupVec,
+    NumberType *                                wfcQuadPointData,
+    NumberType *                                gradWfcQuadPointData,
+    double *                                    rhoCellsWfcContributions,
+    double *                                    gradRhoCellsWfcContributions,
+    double *                                    rho,
+    double *                                    gradRho,
+    const bool                                  isEvaluateGradRho);
+
+  template <typename NumberType>
+  void
+  computeRhoResponseFromInterpolatedValues(
+    std::shared_ptr<
+      dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
+      &                                         BLASWrapperPtr,
+    const std::pair<unsigned int, unsigned int> cellRange,
+    const std::pair<unsigned int, unsigned int> vecRange,
+    const unsigned int                          nQuadsPerCell,
+    double *                                    onesVec,
+    double *                                    partialOccupVecPrime,
+    NumberType *                                wfcQuadPointData,
+    NumberType *                                wfcPrimeQuadPointData,
+    double *rhoResponseHamCellsWfcContributions,
+    double *rhoResponseFermiEnergyCellsWfcContributions,
+    double *rhoResponseHam,
+    double *rhoResponseFermiEnergy);
+
+  template <typename NumberType>
+  void
+  computeKineticEnergyDensityFromInterpolatedValues(
+    const dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>
+      &                                         BLASWrapperPtr,
+    const std::pair<unsigned int, unsigned int> cellRange,
+    const std::pair<unsigned int, unsigned int> vecRange,
+    const unsigned int                          nQuadsPerCell,
+    double *                                    partialOccupVec,
+    double *                                    kcoord,
+    NumberType *                                wfcQuadPointData,
+    NumberType *                                gradWfcQuadPointData,
+    double *        kineticEnergyCellsWfcContributions,
+    double *        kineticEnergyDensity,
+    const MPI_Comm &mpiCommDomain);
+
+} // namespace dftfe
+#endif
+#endif
@@ -23,6 +23,7 @@
 #include "dftParameters.h"
 #include "FEBasisOperations.h"
 #include <BLASWrapper.h>
+#include "densityCalculatorDeviceKernels.h"
 
 
 namespace dftfe
@@ -57,15 +58,12 @@ namespace dftfe
   template <typename NumberType>
   void
   computeRhoResponseFromInterpolatedValues(
-    std::shared_ptr<
-      dftfe::basis::
-        FEBasisOperations<NumberType, double, dftfe::utils::MemorySpace::HOST>>
-      &basisOperationsPtr,
     std::shared_ptr<
       dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::HOST>>
       &                                         BLASWrapperPtr,
     const std::pair<unsigned int, unsigned int> cellRange,
     const std::pair<unsigned int, unsigned int> vecRange,
+    const unsigned int                          nQuadsPerCell,
     double *                                    onesVec,
     double *                                    partialOccupPrimeVec,
     NumberType *                                wfcQuadPointData,
@@ -75,29 +73,5 @@ namespace dftfe
     double *rhoResponseHam,
     double *rhoResponseFermiEnergy);
 
-#if defined(DFTFE_WITH_DEVICE)
-  template <typename NumberType>
-  void
-  computeRhoResponseFromInterpolatedValues(
-    std::shared_ptr<
-      dftfe::basis::FEBasisOperations<NumberType,
-                                      double,
-                                      dftfe::utils::MemorySpace::DEVICE>>
-      &basisOperationsPtr,
-    std::shared_ptr<
-      dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
-      &                                         BLASWrapperPtr,
-    const std::pair<unsigned int, unsigned int> cellRange,
-    const std::pair<unsigned int, unsigned int> vecRange,
-    double *                                    onesVec,
-    double *                                    partialOccupVecPrime,
-    NumberType *                                wfcQuadPointData,
-    NumberType *                                wfcPrimeQuadPointData,
-    double *rhoResponseHamCellsWfcContributions,
-    double *rhoResponseFermiEnergyCellsWfcContributions,
-    double *rhoResponseHam,
-    double *rhoResponseFermiEnergy);
-#endif
-
 } // namespace dftfe
 #endif
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`	`#include <DeviceDataTypeOverloads.h>`
`25`	`25`	`#include <DeviceTypeConfig.h>`
`26`	`26`	`#include <DeviceKernelLauncherConstants.h>`
`27`		`-`
	`27`	`+#include <MemoryStorage.h>`
`28`	`28`	`namespace dftfe`
`29`	`29`	`{`
`30`	`30`	`namespace AtomicCenteredNonLocalOperatorKernelsDevice`