diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index 51506851f..b9f100a57 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -21,13 +21,6 @@ #include "oneapi/dal/algo/covariance.hpp" #endif -#ifndef ONEDAL_DATA_CONVERSION -#define ONEDAL_DATA_CONVERSION -#include "data_management/data_source/csv_feature_manager.h" -#include "data_management/data_source/file_data_source.h" -#undef ONEDAL_DATA_CONVERSION -#endif - #include "OneCCL.h" #include "com_intel_oap_mllib_stat_CorrelationDALImpl.h" #include "service.h" @@ -155,37 +148,6 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, } } -std::vector get_file_path(const std::string& path) { - std::vector result; - for (auto& file : fs::directory_iterator(path)){ - if(fs::is_empty(file.path())){ - continue; - }else if(file.path().extension()==".crc" || file.path().extension()==""){ - continue; - }else{ - result.push_back(file.path()); - } - } - return result; -} - -inline bool check_file(const std::string& name) { - return std::ifstream{ name }.good(); -} - -inline std::string get_data_path(const std::string& name) { - const std::vector paths = { "./data", "samples/oneapi/dpc/mpi/data" }; - - for (const auto& path : paths) { - const std::string try_path = path + "/" + name; - if (check_file(try_path)) { - return try_path; - } - } - - return name; -} - #ifdef CPU_GPU_PROFILE static void doCorrelationOneAPICompute( JNIEnv *env, jlong pNumTabData, long numRows, long numClos, @@ -193,32 +155,27 @@ static void doCorrelationOneAPICompute( jobject resultObj, sycl::queue &queue, std::string breakdown_name) { logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); - auto t1 = std::chrono::high_resolution_clock::now(); - auto input_vec = get_file_path("/home/damon/storage/DataRoot/HiBench_CSV/Correlation/Input/4000000"); - const auto train_data_file_name = get_data_path(input_vec[comm.get_rank()]); - cout << "rank id = " << comm.get_rank() << " File name: " << train_data_file_name << endl; - const auto htable = read(queue, csv::data_source{ train_data_file_name }); - comm.barrier(); - -// float *htableArray = reinterpret_cast(pNumTabData); -// logger::println(logger::INFO, "numRows was %d", numRows); -// logger::println(logger::INFO, "numClos was %d", numClos); -// -// auto data = sycl::malloc_shared(numRows * numClos, queue); -// std::cout << "table size : " << numRows * numClos << std::endl; -// logger::Logger::getInstance(breakdown_name).printLogToFile("rankID was %d, table size %ld.", comm.get_rank(), numRows * numClos ); -// queue.memcpy(data, htableArray, sizeof(float) * numRows * numClos).wait(); -// homogen_table htable{queue, data, numRows, numClos, -// detail::make_default_delete(queue)}; -// auto t2 = std::chrono::high_resolution_clock::now(); -// auto duration = -// (float)std::chrono::duration_cast(t2 - t1) -// .count(); -// logger::println(logger::INFO, -// "Correlation batch(native): create homogen table took %f secs", -// duration / 1000); -// -// logger::Logger::getInstance(breakdown_name).printLogToFile("rankID was %d, create homogen table took %f secs.", comm.get_rank(), duration / 1000 ); + + float *htableArray = reinterpret_cast(pNumTabData); + logger::println(logger::INFO, "numRows was %d", numRows); + logger::println(logger::INFO, "numClos was %d", numClos); + + auto data = sycl::malloc_shared(numRows * numClos, queue); + std::cout << "table size : " << numRows * numClos << std::endl; + logger::Logger::getInstance(breakdown_name).printLogToFile("rankID was %d, table size %ld.", comm.get_rank(), numRows * numClos ); + queue.memcpy(data, htableArray, sizeof(float) * numRows * numClos).wait(); + homogen_table htable{queue, data, numRows, numClos, + detail::make_default_delete(queue)}; + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + logger::println(logger::INFO, + "Correlation batch(native): create homogen table took %f secs", + duration / 1000); + + logger::Logger::getInstance(breakdown_name).printLogToFile("rankID was %d, create homogen table took %f secs.", comm.get_rank(), duration / 1000 ); + const auto cor_desc = covariance_gpu::descriptor{}.set_result_options( covariance_gpu::result_options::cor_matrix |