Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
minmingzhu committed Nov 1, 2024
1 parent 321d15b commit d38a4e4
Show file tree
Hide file tree
Showing 28 changed files with 56 additions and 118 deletions.
41 changes: 20 additions & 21 deletions mllib-dal/src/main/native/CCLInitSingleton.hpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
/*******************************************************************************
* Copyright 2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
* Copyright 2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#pragma once
#include <iostream>
Expand All @@ -21,10 +21,10 @@
#include "Logger.h"

class CCLInitSingleton {
public:
public:
ccl::shared_ptr_class<ccl::kvs> kvs;

static CCLInitSingleton &get(int size, int rank, ccl::string ccl_ip_port) {
static CCLInitSingleton& get(int size, int rank, ccl::string ccl_ip_port) {
static std::once_flag flag;
static CCLInitSingleton instance;
std::call_once(flag, [size, rank, ccl_ip_port] {
Expand All @@ -33,8 +33,9 @@ class CCLInitSingleton {
return instance;
}

private:
CCLInitSingleton() {}
private:
CCLInitSingleton() {
}

CCLInitSingleton(int size, int rank, ccl::string ccl_ip_port) {
auto t1 = std::chrono::high_resolution_clock::now();
Expand All @@ -48,9 +49,7 @@ class CCLInitSingleton {

auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 -
t1)
.count();
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();

logger::println(logger::INFO, "OneCCL singleton init took %f secs",
duration / 1000);
Expand Down
4 changes: 2 additions & 2 deletions mllib-dal/src/main/native/CorrelationImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL(
JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows,
jlong numCols, jint executorNum, jint executorCores,
jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring ip_port,
jobject resultObj) {
jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -229,6 +228,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL(
logger::println(logger::INFO,
"oneDAL (native): use GPU kernels with rankid %d",
rank);

auto comm = getDalComm();
doCorrelationOneAPICompute(env, pNumTabData, numRows, numCols, comm,
resultObj);
Expand Down
4 changes: 2 additions & 2 deletions mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif
jint treeCount, jint numFeaturesPerNode, jint minObservationsLeafNode,
jint minObservationsSplitNode, jdouble minWeightFractionLeafNode,
jdouble minImpurityDecreaseSplitNode, jint maxTreeDepth, jlong seed,
jint maxBins, jboolean bootstrap, jintArray gpuIdxArray, jstring ip_port,
jint maxBins, jboolean bootstrap, jintArray gpuIdxArray,
jobject resultObj) {
logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels");

Expand All @@ -316,6 +316,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif
logger::println(logger::INFO,
"oneDAL (native): use GPU kernels with rankid %d",
rank);

auto comm = getDalComm();
jobject hashmapObj = doRFClassifierOneAPICompute(
env, pNumTabFeature, featureRows, featureCols, pNumTabLabel,
Expand All @@ -324,7 +325,6 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif
minObservationsSplitNode, minWeightFractionLeafNode,
minImpurityDecreaseSplitNode, maxTreeDepth, seed, maxBins,
bootstrap, comm, resultObj);
env->ReleaseStringUTFChars(ip_port, str);
return hashmapObj;
}
default: {
Expand Down
4 changes: 2 additions & 2 deletions mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra
jint executorNum, jint computeDeviceOrdinal, jint treeCount,
jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth,
jlong seed, jint maxbins, jboolean bootstrap, jintArray gpuIdxArray,
jstring ip_port, jobject resultObj) {
jobject resultObj) {
logger::println(logger::INFO,
"OneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand All @@ -308,13 +308,13 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra
logger::println(logger::INFO,
"OneDAL (native): use GPU kernels with rankid %d",
rank);

auto comm = getDalComm();
jobject hashmapObj = doRFRegressorOneAPICompute(
env, pNumTabFeature, featureRows, featureCols, pNumTabLabel,
labelCols, executorNum, computeDeviceOrdinal, treeCount,
numFeaturesPerNode, minObservationsLeafNode, maxTreeDepth, seed,
maxbins, bootstrap, comm, resultObj);
env->ReleaseStringUTFChars(ip_port, str);
return hashmapObj;
}
default: {
Expand Down
4 changes: 2 additions & 2 deletions mllib-dal/src/main/native/KMeansImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe
JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows,
jlong numCols, jlong pNumTabCenters, jint clusterNum, jdouble tolerance,
jint iterationNum, jint executorNum, jint executorCores,
jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring ip_port,
jobject resultObj) {
jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"OneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -346,6 +345,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe
ret = doKMeansOneAPICompute(env, pNumTabData, numRows, numCols,
pNumTabCenters, clusterNum, tolerance,
iterationNum, comm, resultObj);

break;
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion mllib-dal/src/main/native/LinearRegressionImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra
jlong featureCols, jlong label, jlong labelCols, jboolean fitIntercept,
jdouble regParam, jdouble elasticNetParam, jint executorNum,
jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray,
jstring ip_port, jobject resultObj) {
jobject resultObj) {

logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
Expand Down
45 changes: 2 additions & 43 deletions mllib-dal/src/main/native/OneCCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,8 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init(
.count();
logger::println(logger::INFO, "OneCCL (native): init took %f secs",
duration / 1000);

const char *str = env->GetStringUTFChars(ip_port, 0);
ccl::string ccl_ip_port(str);
const char *device = env->GetStringUTFChars(use_device, 0);
ccl::string ccl_ip_port(str);

#ifdef CPU_ONLY_PROFILE
auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port);
Expand Down Expand Up @@ -123,46 +120,8 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init(
jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J");
jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J");

env->SetLongField(param, size, comm_size);
env->SetLongField(param, rank, rank_id);
env->ReleaseStringUTFChars(ip_port, str);

return 1;
}

/*
* Class: com_intel_oap_mllib_OneCCL__
* Method: c_init
* Signature: ()I
*/
JNIEXPORT jint JNICALL
Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject, jint size, jint rank, jobject param) {
logger::printerrln(logger::INFO, "OneCCL (native): init dpcpp");
auto t1 = std::chrono::high_resolution_clock::now();

ccl::init();

const char *str = env->GetStringUTFChars(ip_port, 0);
ccl::string ccl_ip_port(str);

auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port);

g_kvs.push_back(singletonCCLInit.kvs);


auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
logger::println(logger::INFO, "OneCCL (native): init took %f secs",
duration / 1000);

jclass cls = env->GetObjectClass(param);
jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J");
jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J");

env->SetLongField(param, size, comm_size);
env->SetLongField(param, rank, rank_id);
env->SetLongField(param, fid_comm_size, comm_size);
env->SetLongField(param, fid_rank_id, rank_id);
env->ReleaseStringUTFChars(ip_port, str);

return 1;
Expand Down
4 changes: 2 additions & 2 deletions mllib-dal/src/main/native/PCAImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,7 @@ JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL(
JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows,
jlong numCols, jint executorNum, jint executorCores,
jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring ip_port,
jobject resultObj) {
jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -281,6 +280,7 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL(
logger::println(logger::INFO,
"oneDAL (native): use GPU kernels with rankid %d",
rank);

auto comm = getDalComm();
doPCAOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj);
break;
Expand Down
2 changes: 1 addition & 1 deletion mllib-dal/src/main/native/Profile.hpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#pragma once

#include "Logger.h"
#include <chrono>
#include <iostream>
#include <string>
#include "Logger.h"

class Profiler {
public:
Expand Down
3 changes: 1 addition & 2 deletions mllib-dal/src/main/native/SummarizerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,7 @@ JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL(
JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows,
jlong numCols, jint executorNum, jint executorCores,
jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring ip_port,
jobject resultObj) {
jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion mllib-dal/src/main/native/service.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ using namespace daal::data_management;
typedef std::vector<daal::byte> ByteBuffer;
typedef float GpuAlgorithmFPType; /* Algorithm floating-point type */
typedef double CpuAlgorithmFPType; /* Algorithm floating-point type */

enum class ComputeDevice { host, cpu, gpu, uninitialized };
const std::string ComputeDeviceString[] = {"HOST", "CPU", "GPU"};

Expand Down
8 changes: 3 additions & 5 deletions mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ object OneCCL extends Logging {

def init(executor_num: Int, rank: Int, ip_port: String): Unit = {

setExecutorEnv()

logInfo(s"Initializing with IP_PORT: ${ip_port}")

// cclParam is output from native code
Expand All @@ -41,9 +39,9 @@ object OneCCL extends Logging {
s"commSize, ${cclParam.getCommSize}, rankId: ${cclParam.getRankId}")
}

// Run on Executor
def setExecutorEnv(key: String, value: String): Unit = {
setEnv(key, value)
// Sets the specified value to allow each executor to run on the specified GPU
def setAffinityMask(rankId: String): Unit = {
setEnv("ZE_AFFINITY_MASK", rankId)
}

// Run on Executor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ class RandomForestClassifierDALImpl(val uid: String,
maxBins,
bootstrap,
gpuIndices,
kvsIPPort,
result)

val computeEndTime = System.nanoTime()
Expand Down Expand Up @@ -159,7 +158,6 @@ class RandomForestClassifierDALImpl(val uid: String,
maxBins: Int,
bootstrap: Boolean,
gpuIndices: Array[Int],
kvsIPPort: String,
result: RandomForestResult):
java.util.HashMap[java.lang.Integer, java.util.ArrayList[LearningNode]]
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ class KMeansDALImpl(var nClusters: Int,
executorCores,
computeDevice.ordinal(),
gpuIndices,
kvsIPPort,
result
)

Expand All @@ -105,9 +104,7 @@ class KMeansDALImpl(var nClusters: Int,
} else {
Iterator.empty
}
if (useDevice == "CPU") {
OneCCL.cleanup()
}
OneCCL.cleanup()
ret
}.collect()

Expand Down Expand Up @@ -148,6 +145,5 @@ class KMeansDALImpl(var nClusters: Int,
executorCores: Int,
computeDeviceOrdinal: Int,
gpuIndices: Array[Int],
kvsIPPort: String,
result: KMeansResult): Long
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ class PCADALImpl(val k: Int,
executorCores,
computeDevice.ordinal(),
gpuIndices,
kvsIPPort,
result
)

Expand Down Expand Up @@ -219,6 +218,5 @@ class PCADALImpl(val k: Int,
executorCores: Int,
computeDeviceOrdinal: Int,
gpuIndices: Array[Int],
kvsIPPort: String,
result: PCAResult): Long
}
Loading

0 comments on commit d38a4e4

Please sign in to comment.