accel-sim · FJShen · Mar 9, 2022 · Mar 9, 2022 · Mar 10, 2022 · Mar 10, 2022
diff --git a/libcuda/cuda_runtime_api.cc b/libcuda/cuda_runtime_api.cc
@@ -1020,8 +1020,7 @@ cudaError_t cudaLaunchInternal(const char *hostFun,
   dim3 blockDim = config.block_dim();
 
   gpgpu_t *gpu = context->get_device()->get_gpgpu();
-  checkpoint *g_checkpoint;
-  g_checkpoint = new checkpoint();
+  checkpoint checkpoint_instance;
   class memory_space *global_mem;
   global_mem = gpu->get_global_memory();
 
@@ -1030,15 +1029,15 @@ cudaError_t cudaLaunchInternal(const char *hostFun,
     snprintf(f1name, 2048, "checkpoint_files/global_mem_%d.txt",
              grid->get_uid());
 
-    g_checkpoint->load_global_mem(global_mem, f1name);
+    checkpoint_instance.load_global_mem(global_mem, f1name);
     for (int i = 0; i < gpu->resume_CTA; i++) grid->increment_cta_id();
   }
   if (gpu->resume_option == 1 && (grid->get_uid() < gpu->resume_kernel)) {
     char f1name[2048];
     snprintf(f1name, 2048, "checkpoint_files/global_mem_%d.txt",
              grid->get_uid());
 
-    g_checkpoint->load_global_mem(global_mem, f1name);
+    checkpoint_instance.load_global_mem(global_mem, f1name);
     printf("Skipping kernel %d as resuming from kernel %d\n", grid->get_uid(),
            gpu->resume_kernel);
     ctx->api->g_cuda_launch_stack.pop_back();

diff --git a/src/cuda-sim/cuda-sim.cc b/src/cuda-sim/cuda-sim.cc
@@ -40,6 +40,7 @@ typedef void *yyscan_t;
 #include <map>
 #include <set>
 #include <sstream>
+#include <memory>
 #include "../../libcuda/gpgpu_context.h"
 #include "../abstract_hardware_model.h"
 #include "../gpgpu-sim/gpu-sim.h"
@@ -284,7 +285,7 @@ void function_info::ptx_assemble() {
   // get the instructions into instruction memory...
   unsigned num_inst = m_instructions.size();
   m_instr_mem_size = MAX_INST_SIZE * (num_inst + 1);
-  m_instr_mem = new ptx_instruction *[m_instr_mem_size];
+  m_instr_mem = std::unique_ptr<ptx_instruction*[]>(new ptx_instruction* [m_instr_mem_size]);
 
   printf("GPGPU-Sim PTX: instruction assembly for function \'%s\'... ",
          m_name.c_str());

diff --git a/src/cuda-sim/ptx_ir.h b/src/cuda-sim/ptx_ir.h
@@ -38,6 +38,7 @@
 #include <map>
 #include <string>
 #include <vector>
+#include <memory> //unique_ptr
 
 //#include "ptx.tab.h"
 #include "ptx_sim.h"
@@ -1401,7 +1402,7 @@ class function_info {
   bool m_assembled;
   bool pdom_done;  // flag to check whether pdom is completed or not
   std::string m_name;
-  ptx_instruction **m_instr_mem;
+  std::unique_ptr<ptx_instruction*[]> m_instr_mem; //unique_ptr<T[]> manages a dynamically-allocated array of objects (e.g. allocated with new[])
   unsigned m_start_PC;
   unsigned m_instr_mem_size;
   std::map<std::string, param_t> m_kernel_params;

diff --git a/src/gpgpu-sim/gpu-sim.cc b/src/gpgpu-sim/gpu-sim.cc
@@ -1866,7 +1866,7 @@ void shader_core_ctx::issue_block2core(kernel_info_t &kernel) {
   function_info *kernel_func_info = kernel.entry();
   symbol_table *symtab = kernel_func_info->get_symtab();
   unsigned ctaid = kernel.get_next_cta_id_single();
-  checkpoint *g_checkpoint = new checkpoint();
+  checkpoint g_checkpoint_inst;
   for (unsigned i = start_thread; i < end_thread; i++) {
     m_threadState[i].m_cta_id = free_cta_hw_id;
     unsigned warp_id = i / m_config->warp_size;
@@ -1885,7 +1885,7 @@ void shader_core_ctx::issue_block2core(kernel_info_t &kernel) {
       char f1name[2048];
       snprintf(f1name, 2048, "checkpoint_files/local_mem_thread_%d_%d_reg.txt",
                i % cta_size, ctaid);
-      g_checkpoint->load_global_mem(m_thread[i]->m_local_mem, f1name);
+      g_checkpoint_inst.load_global_mem(m_thread[i]->m_local_mem, f1name);
     }
     //
     warps.set(warp_id);
@@ -1901,7 +1901,7 @@ void shader_core_ctx::issue_block2core(kernel_info_t &kernel) {
     char f1name[2048];
     snprintf(f1name, 2048, "checkpoint_files/shared_mem_%d.txt", ctaid);
 
-    g_checkpoint->load_global_mem(m_thread[start_thread]->m_shared_mem, f1name);
+    g_checkpoint_inst.load_global_mem(m_thread[start_thread]->m_shared_mem, f1name);
   }
   // now that we know which warps are used in this CTA, we can allocate
   // resources for use in CTA-wide barrier operations

diff --git a/src/gpgpu-sim/shader.h b/src/gpgpu-sim/shader.h
@@ -994,6 +994,14 @@ class opndcoll_rfu_t {  // operand collector based register file unit
     void init(bool sub_core_model, unsigned num_warp_scheds) {
       m_sub_core_model = sub_core_model;
       m_num_warp_scheds = num_warp_scheds;
+
+      if (m_sub_core_model) {
+        m_last_cu_set = new unsigned[m_num_warp_scheds];
+        for (unsigned i = 0; i < m_num_warp_scheds; i++)
+        {
+          m_last_cu_set[i] = i * m_num_collectors / m_num_warp_scheds;
+        }
+      }
     }
 
     collector_unit_t *find_ready() {

diff --git a/src/gpgpusim_entrypoint.cc b/src/gpgpusim_entrypoint.cc
@@ -342,6 +342,8 @@ gpgpu_sim *gpgpu_context::gpgpu_ptx_sim_init_perf() {
   sem_init(&(the_gpgpusim->g_sim_signal_finish), 0, 0);
   sem_init(&(the_gpgpusim->g_sim_signal_exit), 0, 0);
 
+  option_parser_destroy(opp);
+
   return the_gpgpusim->g_the_gpu;
 }