From 2240810662d048400a75014b199b9bd799254c62 Mon Sep 17 00:00:00 2001 From: xfate123 Date: Sat, 16 May 2020 10:57:53 -0800 Subject: [PATCH 01/37] add gcp storage to xgboost-operator, still working on in --- config/samples/xgboost-dist/main.py | 12 +++++++++--- config/samples/xgboost-dist/utils.py | 25 +++++++++++++++++++++---- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/config/samples/xgboost-dist/main.py b/config/samples/xgboost-dist/main.py index 19311911..81d4a003 100644 --- a/config/samples/xgboost-dist/main.py +++ b/config/samples/xgboost-dist/main.py @@ -21,10 +21,10 @@ def main(args): model_storage_type = args.model_storage_type - if (model_storage_type == "local" or model_storage_type == "oss"): + if (model_storage_type == "local" or model_storage_type == "oss" or model_storage_typr == 'gcp'): print ( "The storage type is " + model_storage_type) else: - raise Exception("Only supports storage types like local and OSS") + raise Exception("Only supports storage types like local, OSS and GCP") if args.job_type == "Predict": logging.info("starting the predict job") @@ -66,11 +66,13 @@ def main(args): parser.add_argument( '--learning_rate', help='Learning rate for the model', + type=int, default=0.1 ) parser.add_argument( '--early_stopping_rounds', help='XGBoost argument for stopping early', + type=int, default=50 ) parser.add_argument( @@ -85,7 +87,11 @@ def main(args): ) parser.add_argument( '--oss_param', - help='oss parameter if you choose the model storage as OSS type', + help='oss parameter if you choose the model storage as OSS type' + ) + parser.add_argument( + '--gcp_param', + help='gcp parameter if you choose the model storage as GCP type' ) logging.basicConfig(format='%(message)s') diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index 283af8ba..755a0f9c 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -15,6 +15,7 @@ import xgboost as xgb import os import tempfile +from googel.cloud import storage import oss2 import json import pandas as pd @@ -59,7 +60,7 @@ def read_train_data(rank, num_workers, path): y = iris.target start, end = get_range_data(len(x), rank, num_workers) - x = x[start:end, :] + x = x[start:end] y = y[start:end] x = pd.DataFrame(x) @@ -87,7 +88,7 @@ def read_predict_data(rank, num_workers, path): y = iris.target start, end = get_range_data(len(x), rank, num_workers) - x = x[start:end, :] + x = x[start:end] y = y[start:end] x = pd.DataFrame(x) y = pd.DataFrame(y) @@ -113,7 +114,7 @@ def get_range_data(num_row, rank, num_workers): x_start = rank * num_per_partition x_end = (rank + 1) * num_per_partition - if x_end > num_row: + if x_end > num_row or (rank==num_workers-1 and x_end< num_row): x_end = num_row return x_start, x_end @@ -140,10 +141,18 @@ def dump_model(model, type, model_path, args): oss_param = parse_parameters(args.oss_param, ",", ":") if oss_param is None: raise Exception("Please config oss parameter to store model") - + return False oss_param['path'] = args.model_path dump_model_to_oss(oss_param, model) logging.info("Dump model into oss place %s", args.model_path) + elif type == 'gcp': + gcp_param = parse_parameters(args.gcp_param, ','.':') + if gcp_param is None: + raise Exception('Please config gcp parameter to store model') + return False + gcp_param['path'] = args.model_path + dump_model_to_gcp(gcp_param, model) + logging.info('Dump model into gcp place %s', args.model_path) return True @@ -171,6 +180,14 @@ def read_model(type, model_path, args): model = read_model_from_oss(oss_param) logging.info("read model from oss place %s", model_path) + elif type == 'gcp': + gcp_param = parse_parameters(args.gcp_param,',',':') + if gcp_param is None: + raise Exception('Please config gcp to read model') + return False + gcp_param['path'] = args.model_path + model = read_model_from_gcp(args.gcp_param) + logging.info('read model from gcp place %s', model_path) return model From ba00fb4f5008336b1e6930a9f464a1f8ae8c6774 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 12:50:33 -0700 Subject: [PATCH 02/37] Update utils.py draft updated. Appreciate further review --- config/samples/xgboost-dist/utils.py | 71 +++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index 755a0f9c..0f1e78b3 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -16,6 +16,7 @@ import os import tempfile from googel.cloud import storage +from oauth2client.service_account import ServiceAccountCredentials import oss2 import json import pandas as pd @@ -230,6 +231,37 @@ def dump_model_to_oss(oss_parameters, booster): return False return True +def dump_model_to_gcp(gcp_parameter,booster): + model_fname = os.path.join(tempfile.mkdtemp(), 'model') + text_model_fname = os.path.join(tempfile.mkdtemp(), 'model.text') + feature_importance = os.path.join(tempfile.mkdtemp(), + 'feature_importance.json') + + gcp_path = gcp_parameters['path'] + logger.info('---- export model ----') + booster.save_model(model_fname) + booster.dump_model(text_model_fname) + fscore_dict = booster.get_fscore() + with open(feature_importance, 'w') as file: + file.write(json.dumps(fscore_dict)) + logger.info('---- chief dump model successfully!') + + if os.path.exists(model_fname): + logger.info('---- Upload Model start...') + + while gcp_path[-1] == '/': + gcp_path = gcp_path[:-1] + + upload_gcp(gcp_parameters, model_fname, gcp_path) + aux_path = gcp_path + '_dir/' + upload_gcp(gcp_parameters, model_fname, aux_path) + upload_gcp(gcp_parameters, text_model_fname, aux_path) + upload_gcp(gcp_parameters, feature_importance, aux_path) + else: + raise Exception("fail to generate model") + return False + + return True def upload_oss(kw, local_file, oss_path): @@ -254,6 +286,23 @@ def upload_oss(kw, local_file, oss_path): except Exception(): raise ValueError('upload %s to %s failed' % (os.path.abspath(local_file), oss_path)) +def upload_gcp(kw, local_file, gcp_path): + if gcp_path[-1] == '/': + gcp_path = '%s%s' % (gcp_path, os.path.basename(local_file)) + credentials_dict = { + 'type': kw['type'], + 'client_id': kw['client_id'], + 'client_email': kw['client_email'] + 'private_key_id':kw['private_key_id'] + 'private_key': kw['private_key'] + } + credentials=ServiceAccountCredentials.from_json_keyfile_dict(credential_dict) + client = storage.Client(credentials=credentials) + bucket=storage.get_bucket(kw['access_bucket']) + blob=bucket.blob(gcp_path) + blob.upload_from_filename(local_file) + + def read_model_from_oss(kw): @@ -280,7 +329,27 @@ def read_model_from_oss(kw): bst.load_model(temp_model_fname) return bst - +def read_model_from_gcp(kw): + credentials_dict = { + 'type': kw['type'], + 'client_id': kw['client_id'], + 'client_email': kw['client_email'] + 'private_key_id':kw['private_key_id'] + 'private_key': kw['private_key'] + } + credentials=ServiceAccountCredentials.from_json_keyfile_dict(credential_dict) + client = storage.Client(credentials=credentials) + bucket=storage.get_bucket(kw['access_bucket']) + gcp_path = kw["path"] + blob = bucket.blob(gcp_path) + temp_model_fname = os.path.join(tempfile.mkdtemp(), 'local_model') + try: + blob.download_to_filename(temp_model_fname) + logger.info("success to load model from gcp %s", gcp_path) + except Exception as e: + logging.error("fail to load model: " + e) + raise Exception("fail to load model from gcp %s", gcp_path) + def parse_parameters(input, splitter_between, splitter_in): """ From f3d861994713f9e60b72fb69b4815ad6d46e0321 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:09:51 -0700 Subject: [PATCH 03/37] Update utils.py --- config/samples/xgboost-dist/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index 0f1e78b3..12caaf5a 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -207,7 +207,7 @@ def dump_model_to_oss(oss_parameters, booster): 'feature_importance.json') oss_path = oss_parameters['path'] - logger.info('---- export model ----') + logger.info('---- export model to OSS----') booster.save_model(model_fname) booster.dump_model(text_model_fname) # format output model fscore_dict = booster.get_fscore() @@ -226,6 +226,7 @@ def dump_model_to_oss(oss_parameters, booster): upload_oss(oss_parameters, model_fname, aux_path) upload_oss(oss_parameters, text_model_fname, aux_path) upload_oss(oss_parameters, feature_importance, aux_path) + logger.info('---- model uploaded to OSS successfully!----') else: raise Exception("fail to generate model") return False @@ -238,7 +239,7 @@ def dump_model_to_gcp(gcp_parameter,booster): 'feature_importance.json') gcp_path = gcp_parameters['path'] - logger.info('---- export model ----') + logger.info('---- export model to GCP----') booster.save_model(model_fname) booster.dump_model(text_model_fname) fscore_dict = booster.get_fscore() @@ -257,6 +258,7 @@ def dump_model_to_gcp(gcp_parameter,booster): upload_gcp(gcp_parameters, model_fname, aux_path) upload_gcp(gcp_parameters, text_model_fname, aux_path) upload_gcp(gcp_parameters, feature_importance, aux_path) + logger.info('---- model uploaded to GCP successfully!----') else: raise Exception("fail to generate model") return False From d904b9ce228cc7c985219fe41b8118c16a0d85bf Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:20:09 -0700 Subject: [PATCH 04/37] Update xgboostjob_v1alpha1_iris_predict.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml index 3f3391ac..f854eab3 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml @@ -21,8 +21,8 @@ spec: args: - --job_type=Predict - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=oss - - --oss_param=unknown + - --model_storage_type=gcp + - --gcp_param=unknown Worker: replicas: 2 restartPolicy: ExitCode @@ -40,7 +40,7 @@ spec: args: - --job_type=Predict - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=oss - - --oss_param=unknown + - --model_storage_type=gcp + - --gcp_param=unknown From 813e3ccd715cf98e49a2a203f7a79a982c6b1ab8 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:23:36 -0700 Subject: [PATCH 05/37] Update and rename xgboostjob_v1alpha1_iris_predict.yaml to xgboostjob_v1alpha1_iris_predict_oss.yaml --- ...ict.yaml => xgboostjob_v1alpha1_iris_predict_oss.yaml} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename config/samples/xgboost-dist/{xgboostjob_v1alpha1_iris_predict.yaml => xgboostjob_v1alpha1_iris_predict_oss.yaml} (87%) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml similarity index 87% rename from config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml rename to config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml index f854eab3..3f3391ac 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml @@ -21,8 +21,8 @@ spec: args: - --job_type=Predict - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=gcp - - --gcp_param=unknown + - --model_storage_type=oss + - --oss_param=unknown Worker: replicas: 2 restartPolicy: ExitCode @@ -40,7 +40,7 @@ spec: args: - --job_type=Predict - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=gcp - - --gcp_param=unknown + - --model_storage_type=oss + - --oss_param=unknown From 4056365889dd0cf166fe0da1135ae815a54e79e2 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:25:13 -0700 Subject: [PATCH 06/37] Rename xgboostjob_v1alpha1_iris_train.yaml to xgboostjob_v1alpha1_iris_train_oss.yaml --- ...a1_iris_train.yaml => xgboostjob_v1alpha1_iris_train_oss.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename config/samples/xgboost-dist/{xgboostjob_v1alpha1_iris_train.yaml => xgboostjob_v1alpha1_iris_train_oss.yaml} (100%) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml similarity index 100% rename from config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train.yaml rename to config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml From bae957b1e53576e2f1ab918700db41f7243f9705 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:28:15 -0700 Subject: [PATCH 07/37] Create xgboostjob_v1alpha1_iris_train_gcr.yaml --- .../xgboostjob_v1alpha1_iris_train_gcr.yaml | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcr.yaml diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcr.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcr.yaml new file mode 100644 index 00000000..3b547f52 --- /dev/null +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcr.yaml @@ -0,0 +1,47 @@ +apiVersion: "xgboostjob.kubeflow.org/v1alpha1" +kind: "XGBoostJob" +metadata: + name: "xgboost-dist-iris-test-train" +spec: + xgbReplicaSpecs: + Master: + replicas: 1 + restartPolicy: Never + template: + apiVersion: v1 + kind: Pod + spec: + containers: + - name: xgboostjob + image: docker.io/merlintang/xgboost-dist-iris:1.1 + ports: + - containerPort: 9991 + name: xgboostjob-port + imagePullPolicy: Always + args: + - --job_type=Train + - --xgboost_parameter=objective:multi:softprob,num_class:3 + - --n_estimators=10 + - --learning_rate=0.1 + - --model_path=autoAI/xgb-opt/2 + - --model_storage_type=gcr + - --gcr_param=unknown + Worker: + replicas: 2 + restartPolicy: ExitCode + template: + apiVersion: v1 + kind: Pod + spec: + containers: + - name: xgboostjob + image: docker.io/merlintang/xgboost-dist-iris:1.1 + ports: + - containerPort: 9991 + name: xgboostjob-port + imagePullPolicy: Always + args: + - --job_type=Train + - --xgboost_parameter="objective:multi:softprob,num_class:3" + - --n_estimators=10 + - --learning_rate=0.1 From b583c1b406e69ea6844ac1fd1473682cea120100 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:29:45 -0700 Subject: [PATCH 08/37] Create xgboostjob_v1alpha1_iris_predict_gcr.yaml --- .../xgboostjob_v1alpha1_iris_predict_gcr.yaml | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcr.yaml diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcr.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcr.yaml new file mode 100644 index 00000000..6a71622f --- /dev/null +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcr.yaml @@ -0,0 +1,44 @@ +apiVersion: "xgboostjob.kubeflow.org/v1alpha1" +kind: "XGBoostJob" +metadata: + name: "xgboost-dist-iris-test-predict" +spec: + xgbReplicaSpecs: + Master: + replicas: 1 + restartPolicy: Never + template: + apiVersion: v1 + kind: Pod + spec: + containers: + - name: xgboostjob + image: docker.io/merlintang/xgboost-dist-iris:1.1 + ports: + - containerPort: 9991 + name: xgboostjob-port + imagePullPolicy: Always + args: + - --job_type=Predict + - --model_path=autoAI/xgb-opt/2 + - --model_storage_type=gcr + - --gcr_param=unknown + Worker: + replicas: 2 + restartPolicy: ExitCode + template: + apiVersion: v1 + kind: Pod + spec: + containers: + - name: xgboostjob + image: docker.io/merlintang/xgboost-dist-iris:1.1 + ports: + - containerPort: 9991 + name: xgboostjob-port + imagePullPolicy: Always + args: + - --job_type=Predict + - --model_path=autoAI/xgb-opt/2 + - --model_storage_type=gcr + - --gcr_param=unknown From 758ec4ee614c6932b08dae6967342e7febbb37fa Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:32:21 -0700 Subject: [PATCH 09/37] Update and rename xgboostjob_v1alpha1_iris_predict_gcr.yaml to xgboostjob_v1alpha1_iris_predict_gcp.yaml --- ...gcr.yaml => xgboostjob_v1alpha1_iris_predict_gcp.yaml} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename config/samples/xgboost-dist/{xgboostjob_v1alpha1_iris_predict_gcr.yaml => xgboostjob_v1alpha1_iris_predict_gcp.yaml} (87%) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcr.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml similarity index 87% rename from config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcr.yaml rename to config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml index 6a71622f..ea47de27 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcr.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml @@ -21,8 +21,8 @@ spec: args: - --job_type=Predict - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=gcr - - --gcr_param=unknown + - --model_storage_type=gcp + - --gcp_param=unknown Worker: replicas: 2 restartPolicy: ExitCode @@ -40,5 +40,5 @@ spec: args: - --job_type=Predict - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=gcr - - --gcr_param=unknown + - --model_storage_type=gcp + - --gcp_param=unknown From 4125851e13cec8299b6bfe9b4f7fe89e046a3a3f Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:33:04 -0700 Subject: [PATCH 10/37] Update and rename xgboostjob_v1alpha1_iris_train_gcr.yaml to xgboostjob_v1alpha1_iris_train_gcp.yaml --- ...train_gcr.yaml => xgboostjob_v1alpha1_iris_train_gcp.yaml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename config/samples/xgboost-dist/{xgboostjob_v1alpha1_iris_train_gcr.yaml => xgboostjob_v1alpha1_iris_train_gcp.yaml} (94%) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcr.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml similarity index 94% rename from config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcr.yaml rename to config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml index 3b547f52..84dc4ce9 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcr.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml @@ -24,8 +24,8 @@ spec: - --n_estimators=10 - --learning_rate=0.1 - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=gcr - - --gcr_param=unknown + - --model_storage_type=gcp + - --gcp_param=unknown Worker: replicas: 2 restartPolicy: ExitCode From 9a0e65522e00d106da5c626bafe85f6057f6deb5 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 13:59:10 -0700 Subject: [PATCH 11/37] Update README.md --- config/samples/xgboost-dist/README.md | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/config/samples/xgboost-dist/README.md b/config/samples/xgboost-dist/README.md index 0756a920..74a90c6a 100644 --- a/config/samples/xgboost-dist/README.md +++ b/config/samples/xgboost-dist/README.md @@ -25,14 +25,19 @@ The following files are available to setup distributed XGBoost computation runti To store the model in OSS: -* xgboostjob_v1alpha1_iris_train.yaml -* xgboostjob_v1alpha1_iris_predict.yaml +* xgboostjob_v1alpha1_iris_train_oss.yaml +* xgboostjob_v1alpha1_iris_predict_oss.yaml + +To store the model in GCP: +* xgboostjob_v1alpha1_iris_train_gcp.yaml +* xgboostjob_v1alpha1_iris_predict_gcp.yaml To store the model in local path: * xgboostjob_v1alpha1_iris_train_local.yaml * xgboostjob_v1alpha1_iris_predict_local.yaml +**Configure OSS parameter** For training jobs in OSS , you could configure xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml Note, we use [OSS](https://www.alibabacloud.com/product/oss) to store the trained model, thus, you need to specify the OSS parameter in the yaml file. Therefore, remember to fill the OSS parameter in xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml file. @@ -40,11 +45,19 @@ The oss parameter includes the account information such as access_id, access_key For Eg: --oss_param=endpoint:http://oss-ap-south-1.aliyuncs.com,access_id:XXXXXXXXXXX,access_key:XXXXXXXXXXXXXXXXXXX,access_bucket:XXXXXX Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost job batch prediction. +**Configure GCP parameter** +For training jobs in GCP , you could configure xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml +Note, we use [GCP](https://cloud.google.com/) to store the trained model, +thus, you need to specify the GCP parameter in the yaml file. Therefore, remember to fill the GCP parameter in xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml file. +The oss parameter includes the account information such as type, client_id, client_email,private_key_id,private_key and access_bucket. +For Eg: +--gcp_param=type:XXXXXXX,client_id:XXXXXXXX,client_email:XXXXXXXXXX@gmail.com,private_key_id: XXXXXXXXXXXXX,private_key:XXXXXXXXXXXXXXX, access_bucket:XXXXXX +Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost job batch prediction. **Start the distributed XGBoost train to store the model in OSS** ``` -kubectl create -f xgboostjob_v1alpha1_iris_train.yaml +kubectl create -f xgboostjob_v1alpha1_iris_train_oss.yaml ``` **Look at the train job status** @@ -156,7 +169,7 @@ Events: **Start the distributed XGBoost job predict** ```shell -kubectl create -f xgboostjob_v1alpha1_iris_predict.yaml +kubectl create -f xgboostjob_v1alpha1_iris_predict_oss.yaml ``` **Look at the batch predict job status** From a2a1702d0aae4c43c569f080f08602db38e86515 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 14:00:16 -0700 Subject: [PATCH 12/37] Update xgboostjob_v1alpha1_iris_train_gcp.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml index 84dc4ce9..74dac53a 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml @@ -1,7 +1,7 @@ apiVersion: "xgboostjob.kubeflow.org/v1alpha1" kind: "XGBoostJob" metadata: - name: "xgboost-dist-iris-test-train" + name: "xgboost-dist-iris-test-train-gcp" spec: xgbReplicaSpecs: Master: From 05675a1e74e2ef8af9dc6ddcafe7e7dd03957e56 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 14:00:57 -0700 Subject: [PATCH 13/37] Update xgboostjob_v1alpha1_iris_predict_gcp.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml index ea47de27..9f44f0f2 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml @@ -1,7 +1,7 @@ apiVersion: "xgboostjob.kubeflow.org/v1alpha1" kind: "XGBoostJob" metadata: - name: "xgboost-dist-iris-test-predict" + name: "xgboost-dist-iris-test-predict-gcp" spec: xgbReplicaSpecs: Master: From dc71d6b898f049d5c1c8f0b7f43aa358806332ce Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 14:01:42 -0700 Subject: [PATCH 14/37] Update xgboostjob_v1alpha1_iris_predict_oss.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml index 3f3391ac..10ae7ad5 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml @@ -1,7 +1,7 @@ apiVersion: "xgboostjob.kubeflow.org/v1alpha1" kind: "XGBoostJob" metadata: - name: "xgboost-dist-iris-test-predict" + name: "xgboost-dist-iris-test-predict-oss" spec: xgbReplicaSpecs: Master: From cf309d8e70cfbca222736773a24c0cf811b9ab21 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 14:02:03 -0700 Subject: [PATCH 15/37] Update xgboostjob_v1alpha1_iris_train_oss.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml index d08c3242..effbb36a 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml @@ -1,7 +1,7 @@ apiVersion: "xgboostjob.kubeflow.org/v1alpha1" kind: "XGBoostJob" metadata: - name: "xgboost-dist-iris-test-train" + name: "xgboost-dist-iris-test-train-oss" spec: xgbReplicaSpecs: Master: From ead85636edc37da66293d95c971fe8894809d601 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 14:12:06 -0700 Subject: [PATCH 16/37] Update README.md --- config/samples/xgboost-dist/README.md | 30 ++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/config/samples/xgboost-dist/README.md b/config/samples/xgboost-dist/README.md index 74a90c6a..86fa81a0 100644 --- a/config/samples/xgboost-dist/README.md +++ b/config/samples/xgboost-dist/README.md @@ -55,14 +55,24 @@ For Eg: Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost job batch prediction. -**Start the distributed XGBoost train to store the model in OSS** +**Start the distributed XGBoost train to store the model in cloud** +if you use OSS ``` kubectl create -f xgboostjob_v1alpha1_iris_train_oss.yaml ``` +if you use GCP +``` +kubectl create -f xgboostjob_v1alpha1_iris_train_gcp.yaml +``` **Look at the train job status** +If you use OSS +``` + kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-train-oss + ``` +If you use GCP ``` - kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-train + kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-train-gcp ``` Here is a sample output when the job is finished. The output log like this ``` @@ -167,14 +177,24 @@ Events: Normal XGBoostJobSucceeded 47s xgboostjob-operator XGBoostJob xgboost-dist-iris-test is successfully completed. ``` -**Start the distributed XGBoost job predict** -```shell +**Start the distributed XGBoost job predict in cloud** +If you use OSS +``` kubectl create -f xgboostjob_v1alpha1_iris_predict_oss.yaml ``` +If you use GCP +``` +kubectl create -f xgboostjob_v1alpha1_iris_predict_GCP.yaml +``` **Look at the batch predict job status** +If you use OSS +``` + kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-predict-oss + ``` +If you use GCP ``` - kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-predict + kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-predict-gcp ``` Here is a sample output when the job is finished. The output log like this ``` From fcf83ec2b3d53e12fe65a4d65f2371215ad828ae Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 14:17:03 -0700 Subject: [PATCH 17/37] Update README.md --- config/samples/xgboost-dist/README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/config/samples/xgboost-dist/README.md b/config/samples/xgboost-dist/README.md index 86fa81a0..82100d8a 100644 --- a/config/samples/xgboost-dist/README.md +++ b/config/samples/xgboost-dist/README.md @@ -56,16 +56,18 @@ Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost jo **Start the distributed XGBoost train to store the model in cloud** -if you use OSS + +If you use OSS ``` kubectl create -f xgboostjob_v1alpha1_iris_train_oss.yaml ``` -if you use GCP +If you use GCP ``` kubectl create -f xgboostjob_v1alpha1_iris_train_gcp.yaml ``` **Look at the train job status** + If you use OSS ``` kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-train-oss @@ -178,13 +180,14 @@ Events: ``` **Start the distributed XGBoost job predict in cloud** + If you use OSS ``` kubectl create -f xgboostjob_v1alpha1_iris_predict_oss.yaml ``` If you use GCP ``` -kubectl create -f xgboostjob_v1alpha1_iris_predict_GCP.yaml +kubectl create -f xgboostjob_v1alpha1_iris_predict_gcp.yaml ``` **Look at the batch predict job status** From ef7a7d082edd8769e55409e52f969dd10bb57afe Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 14:18:57 -0700 Subject: [PATCH 18/37] Update README.md --- config/samples/xgboost-dist/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/config/samples/xgboost-dist/README.md b/config/samples/xgboost-dist/README.md index 82100d8a..64743dbe 100644 --- a/config/samples/xgboost-dist/README.md +++ b/config/samples/xgboost-dist/README.md @@ -45,6 +45,7 @@ The oss parameter includes the account information such as access_id, access_key For Eg: --oss_param=endpoint:http://oss-ap-south-1.aliyuncs.com,access_id:XXXXXXXXXXX,access_key:XXXXXXXXXXXXXXXXXXX,access_bucket:XXXXXX Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost job batch prediction. + **Configure GCP parameter** For training jobs in GCP , you could configure xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml Note, we use [GCP](https://cloud.google.com/) to store the trained model, From 9b5d2142b5d8581d6bcd6064bd6a708056455daf Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 16:08:01 -0700 Subject: [PATCH 19/37] Update README.md --- config/samples/xgboost-dist/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/samples/xgboost-dist/README.md b/config/samples/xgboost-dist/README.md index 64743dbe..39da07d9 100644 --- a/config/samples/xgboost-dist/README.md +++ b/config/samples/xgboost-dist/README.md @@ -40,7 +40,7 @@ To store the model in local path: **Configure OSS parameter** For training jobs in OSS , you could configure xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml Note, we use [OSS](https://www.alibabacloud.com/product/oss) to store the trained model, -thus, you need to specify the OSS parameter in the yaml file. Therefore, remember to fill the OSS parameter in xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml file. +thus, you need to specify the OSS parameter in the yaml file. Therefore, remember to fill the OSS parameter in xgboostjob_v1alpha1_iris_train_oss.yaml and xgboostjob_v1alpha1_iris_predict_oss.yaml file. The oss parameter includes the account information such as access_id, access_key, access_bucket and endpoint. For Eg: --oss_param=endpoint:http://oss-ap-south-1.aliyuncs.com,access_id:XXXXXXXXXXX,access_key:XXXXXXXXXXXXXXXXXXX,access_bucket:XXXXXX @@ -49,8 +49,8 @@ Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost jo **Configure GCP parameter** For training jobs in GCP , you could configure xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml Note, we use [GCP](https://cloud.google.com/) to store the trained model, -thus, you need to specify the GCP parameter in the yaml file. Therefore, remember to fill the GCP parameter in xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml file. -The oss parameter includes the account information such as type, client_id, client_email,private_key_id,private_key and access_bucket. +thus, you need to specify the GCP parameter in the yaml file. Therefore, remember to fill the GCP parameter in xgboostjob_v1alpha1_iris_train_gcp.yaml and xgboostjob_v1alpha1_iris_predict_gcp.yaml file. +The gcp parameter includes the account information such as type, client_id, client_email,private_key_id,private_key and access_bucket. For Eg: --gcp_param=type:XXXXXXX,client_id:XXXXXXXX,client_email:XXXXXXXXXX@gmail.com,private_key_id: XXXXXXXXXXXXX,private_key:XXXXXXXXXXXXXXX, access_bucket:XXXXXX Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost job batch prediction. From 309eee4aee5cc5f574bb92fe9a768bea607baf54 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 17:41:26 -0700 Subject: [PATCH 20/37] Update utils.py --- config/samples/xgboost-dist/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index 12caaf5a..b0aa97f6 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -147,7 +147,7 @@ def dump_model(model, type, model_path, args): dump_model_to_oss(oss_param, model) logging.info("Dump model into oss place %s", args.model_path) elif type == 'gcp': - gcp_param = parse_parameters(args.gcp_param, ','.':') + gcp_param = parse_parameters(args.gcp_param, ',',':') if gcp_param is None: raise Exception('Please config gcp parameter to store model') return False From fb48969b5af6077fd439af6738c90528b9302b73 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 18:43:39 -0700 Subject: [PATCH 21/37] Update utils.py --- config/samples/xgboost-dist/utils.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index b0aa97f6..826d2edd 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -291,13 +291,11 @@ def upload_oss(kw, local_file, oss_path): def upload_gcp(kw, local_file, gcp_path): if gcp_path[-1] == '/': gcp_path = '%s%s' % (gcp_path, os.path.basename(local_file)) - credentials_dict = { - 'type': kw['type'], - 'client_id': kw['client_id'], - 'client_email': kw['client_email'] - 'private_key_id':kw['private_key_id'] - 'private_key': kw['private_key'] - } + credentials_dict = {'type': kw['type'], + 'client_id': kw['client_id'], + 'client_email': kw['client_email'], + 'private_key_id':kw['private_key_id'], + 'private_key': kw['private_key'], } credentials=ServiceAccountCredentials.from_json_keyfile_dict(credential_dict) client = storage.Client(credentials=credentials) bucket=storage.get_bucket(kw['access_bucket']) From af63ce3e8bd08da79e443c3bed77cecf001cb440 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 21:27:01 -0700 Subject: [PATCH 22/37] Update requirements.txt --- config/samples/xgboost-dist/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/requirements.txt b/config/samples/xgboost-dist/requirements.txt index 60841a31..cfcee2c7 100644 --- a/config/samples/xgboost-dist/requirements.txt +++ b/config/samples/xgboost-dist/requirements.txt @@ -6,4 +6,5 @@ scipy>=1.1.0 joblib>=0.13.2 scikit-learn>=0.20 oss2>=2.7.0 -pandas>=0.24.2 \ No newline at end of file +google>=2.0.3 +pandas>=0.24.2 From ca9bed0a387a567a7ede3c2b62de6120187ecf3f Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 21:29:06 -0700 Subject: [PATCH 23/37] Update requirements.txt --- config/samples/xgboost-dist/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/config/samples/xgboost-dist/requirements.txt b/config/samples/xgboost-dist/requirements.txt index cfcee2c7..0e9b8d31 100644 --- a/config/samples/xgboost-dist/requirements.txt +++ b/config/samples/xgboost-dist/requirements.txt @@ -8,3 +8,4 @@ scikit-learn>=0.20 oss2>=2.7.0 google>=2.0.3 pandas>=0.24.2 +oauth2client>=2.0 From 0c2246865496d3f0e7de29da101d017e985374b4 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 21:30:45 -0700 Subject: [PATCH 24/37] Update utils.py --- config/samples/xgboost-dist/utils.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index 826d2edd..927cffba 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -291,12 +291,16 @@ def upload_oss(kw, local_file, oss_path): def upload_gcp(kw, local_file, gcp_path): if gcp_path[-1] == '/': gcp_path = '%s%s' % (gcp_path, os.path.basename(local_file)) - credentials_dict = {'type': kw['type'], - 'client_id': kw['client_id'], - 'client_email': kw['client_email'], - 'private_key_id':kw['private_key_id'], - 'private_key': kw['private_key'], } - credentials=ServiceAccountCredentials.from_json_keyfile_dict(credential_dict) + credentials_dict = { + 'type': kw['type'], + 'client_id': kw['client_id'], + 'client_email': kw['client_email'], + 'private_key_id':kw['private_key_id'], + 'private_key': kw['private_key'], + } + credentials=ServiceAccountCredentials.from_json_keyfile_dict( + credential_dict + ) client = storage.Client(credentials=credentials) bucket=storage.get_bucket(kw['access_bucket']) blob=bucket.blob(gcp_path) @@ -330,14 +334,16 @@ def read_model_from_oss(kw): return bst def read_model_from_gcp(kw): - credentials_dict = { + credentials_dict = { 'type': kw['type'], 'client_id': kw['client_id'], - 'client_email': kw['client_email'] - 'private_key_id':kw['private_key_id'] - 'private_key': kw['private_key'] + 'client_email': kw['client_email'], + 'private_key_id':kw['private_key_id'], + 'private_key': kw['private_key'], } - credentials=ServiceAccountCredentials.from_json_keyfile_dict(credential_dict) + credentials=ServiceAccountCredentials.from_json_keyfile_dict( + credential_dict + ) client = storage.Client(credentials=credentials) bucket=storage.get_bucket(kw['access_bucket']) gcp_path = kw["path"] From 1db400cf5b71c93c14e96fe7d0f596b4d041875b Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 21:41:54 -0700 Subject: [PATCH 25/37] Update requirements.txt --- config/samples/xgboost-dist/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/requirements.txt b/config/samples/xgboost-dist/requirements.txt index 0e9b8d31..d3643842 100644 --- a/config/samples/xgboost-dist/requirements.txt +++ b/config/samples/xgboost-dist/requirements.txt @@ -6,6 +6,6 @@ scipy>=1.1.0 joblib>=0.13.2 scikit-learn>=0.20 oss2>=2.7.0 -google>=2.0.3 +google-cloud-storage 1.28.1 pandas>=0.24.2 oauth2client>=2.0 From ca5522821d73ef0d4099c51a3a4665d524057be4 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 21:45:27 -0700 Subject: [PATCH 26/37] Update requirements.txt --- config/samples/xgboost-dist/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/requirements.txt b/config/samples/xgboost-dist/requirements.txt index d3643842..70f9dda7 100644 --- a/config/samples/xgboost-dist/requirements.txt +++ b/config/samples/xgboost-dist/requirements.txt @@ -6,6 +6,6 @@ scipy>=1.1.0 joblib>=0.13.2 scikit-learn>=0.20 oss2>=2.7.0 -google-cloud-storage 1.28.1 +google-cloud-storage>=1.28.1 pandas>=0.24.2 oauth2client>=2.0 From 24902745ebb47132a0d63023d13fe1cda984c256 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 22:06:55 -0700 Subject: [PATCH 27/37] Update xgboostjob_v1alpha1_iris_predict_gcp.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml index 9f44f0f2..9f602530 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_gcp.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port @@ -32,7 +32,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port From eeb1049466e4d8edb438612647304c6e69d8368a Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 22:07:30 -0700 Subject: [PATCH 28/37] Update xgboostjob_v1alpha1_iris_predict_local.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_predict_local.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_local.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_local.yaml index d19112cd..f3286ac5 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_local.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_local.yaml @@ -17,7 +17,7 @@ spec: claimName: xgboostlocal containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model @@ -42,7 +42,7 @@ spec: claimName: xgboostlocal containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model From fc7543f4d23db030bf69fa599418990e3854986d Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 22:08:05 -0700 Subject: [PATCH 29/37] Update xgboostjob_v1alpha1_iris_predict_oss.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml index 10ae7ad5..58bfdbb2 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_predict_oss.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port @@ -32,7 +32,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port From 8d6cf3c9101c5753e49f60281a2b1ffa6a306104 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 22:08:46 -0700 Subject: [PATCH 30/37] Update xgboostjob_v1alpha1_iris_train_gcp.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml index 74dac53a..3b9f66da 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_gcp.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port @@ -35,7 +35,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port From 341bd49dafc945245ee5bd61919b9867cbc00338 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 22:09:30 -0700 Subject: [PATCH 31/37] Update xgboostjob_v1alpha1_iris_train_local.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_train_local.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_local.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_local.yaml index 2d96c725..fe35a92e 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_local.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_local.yaml @@ -17,7 +17,7 @@ spec: claimName: xgboostlocal containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model @@ -45,7 +45,7 @@ spec: claimName: xgboostlocal containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model From c8185e7a057480f3353d64cf1d71707e032bcbe4 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sat, 16 May 2020 22:10:03 -0700 Subject: [PATCH 32/37] Update xgboostjob_v1alpha1_iris_train_oss.yaml --- .../xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml index effbb36a..e4e4b6b6 100644 --- a/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml +++ b/config/samples/xgboost-dist/xgboostjob_v1alpha1_iris_train_oss.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port @@ -35,7 +35,7 @@ spec: spec: containers: - name: xgboostjob - image: docker.io/merlintang/xgboost-dist-iris:1.1 + image: docker.io/xfate123/xgboost-dist-iris:1.1 ports: - containerPort: 9991 name: xgboostjob-port From 925e26f0863590616639781410b39533d5e5ec4a Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sun, 17 May 2020 12:30:19 -0700 Subject: [PATCH 33/37] Update utils.py --- config/samples/xgboost-dist/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index 927cffba..93fa451c 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -232,7 +232,7 @@ def dump_model_to_oss(oss_parameters, booster): return False return True -def dump_model_to_gcp(gcp_parameter,booster): +def dump_model_to_gcp(gcp_parameters,booster): model_fname = os.path.join(tempfile.mkdtemp(), 'model') text_model_fname = os.path.join(tempfile.mkdtemp(), 'model.text') feature_importance = os.path.join(tempfile.mkdtemp(), From a313d2a56328f3bb01ba2071390085622dcad701 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sun, 17 May 2020 12:31:21 -0700 Subject: [PATCH 34/37] Update main.py --- config/samples/xgboost-dist/main.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/config/samples/xgboost-dist/main.py b/config/samples/xgboost-dist/main.py index 81d4a003..e60a0c24 100644 --- a/config/samples/xgboost-dist/main.py +++ b/config/samples/xgboost-dist/main.py @@ -21,7 +21,7 @@ def main(args): model_storage_type = args.model_storage_type - if (model_storage_type == "local" or model_storage_type == "oss" or model_storage_typr == 'gcp'): + if (model_storage_type == "local" or model_storage_type == "oss" or model_storage_type == 'gcp'): print ( "The storage type is " + model_storage_type) else: raise Exception("Only supports storage types like local, OSS and GCP") @@ -60,13 +60,11 @@ def main(args): parser.add_argument( '--n_estimators', help='Number of trees in the model', - type=int, default=1000 ) parser.add_argument( '--learning_rate', help='Learning rate for the model', - type=int, default=0.1 ) parser.add_argument( From cf82e5ad2162ad24662ebe7ead848e159a3859d6 Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sun, 17 May 2020 13:31:49 -0700 Subject: [PATCH 35/37] Update utils.py --- config/samples/xgboost-dist/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index 93fa451c..cdefb84b 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -292,7 +292,7 @@ def upload_gcp(kw, local_file, gcp_path): if gcp_path[-1] == '/': gcp_path = '%s%s' % (gcp_path, os.path.basename(local_file)) credentials_dict = { - 'type': kw['type'], + 'type': 'service_account', 'client_id': kw['client_id'], 'client_email': kw['client_email'], 'private_key_id':kw['private_key_id'], @@ -335,7 +335,7 @@ def read_model_from_oss(kw): return bst def read_model_from_gcp(kw): credentials_dict = { - 'type': kw['type'], + 'type': 'service_account', 'client_id': kw['client_id'], 'client_email': kw['client_email'], 'private_key_id':kw['private_key_id'], From e57465a256ce8623993c8d78076c1cfda419c99e Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sun, 17 May 2020 14:54:34 -0700 Subject: [PATCH 36/37] Update utils.py --- config/samples/xgboost-dist/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/samples/xgboost-dist/utils.py b/config/samples/xgboost-dist/utils.py index cdefb84b..100732ce 100644 --- a/config/samples/xgboost-dist/utils.py +++ b/config/samples/xgboost-dist/utils.py @@ -299,7 +299,7 @@ def upload_gcp(kw, local_file, gcp_path): 'private_key': kw['private_key'], } credentials=ServiceAccountCredentials.from_json_keyfile_dict( - credential_dict + credentials_dict ) client = storage.Client(credentials=credentials) bucket=storage.get_bucket(kw['access_bucket']) @@ -342,7 +342,7 @@ def read_model_from_gcp(kw): 'private_key': kw['private_key'], } credentials=ServiceAccountCredentials.from_json_keyfile_dict( - credential_dict + credentials_dict ) client = storage.Client(credentials=credentials) bucket=storage.get_bucket(kw['access_bucket']) From 06d2992cbe486358e1f4f89ee8e32df4484ce0bf Mon Sep 17 00:00:00 2001 From: xfate123 <61609849+xfate123@users.noreply.github.com> Date: Sun, 17 May 2020 20:46:13 -0700 Subject: [PATCH 37/37] Update README.md --- config/samples/xgboost-dist/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/samples/xgboost-dist/README.md b/config/samples/xgboost-dist/README.md index 39da07d9..83708048 100644 --- a/config/samples/xgboost-dist/README.md +++ b/config/samples/xgboost-dist/README.md @@ -38,22 +38,22 @@ To store the model in local path: * xgboostjob_v1alpha1_iris_predict_local.yaml **Configure OSS parameter** -For training jobs in OSS , you could configure xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml +For training jobs in OSS , you could configure xgboostjob_v1alpha1_iris_train_oss.yaml and xgboostjob_v1alpha1_iris_predict_oss.yaml Note, we use [OSS](https://www.alibabacloud.com/product/oss) to store the trained model, thus, you need to specify the OSS parameter in the yaml file. Therefore, remember to fill the OSS parameter in xgboostjob_v1alpha1_iris_train_oss.yaml and xgboostjob_v1alpha1_iris_predict_oss.yaml file. The oss parameter includes the account information such as access_id, access_key, access_bucket and endpoint. For Eg: --oss_param=endpoint:http://oss-ap-south-1.aliyuncs.com,access_id:XXXXXXXXXXX,access_key:XXXXXXXXXXXXXXXXXXX,access_bucket:XXXXXX -Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost job batch prediction. +Similarly, xgboostjob_v1alpha1_iris_predict_oss.yaml is used to configure XGBoost job batch prediction. **Configure GCP parameter** -For training jobs in GCP , you could configure xgboostjob_v1alpha1_iris_train.yaml and xgboostjob_v1alpha1_iris_predict.yaml +For training jobs in GCP , you could configure xgboostjob_v1alpha1_iris_train_gcp.yaml and xgboostjob_v1alpha1_iris_predict_gcp.yaml Note, we use [GCP](https://cloud.google.com/) to store the trained model, thus, you need to specify the GCP parameter in the yaml file. Therefore, remember to fill the GCP parameter in xgboostjob_v1alpha1_iris_train_gcp.yaml and xgboostjob_v1alpha1_iris_predict_gcp.yaml file. The gcp parameter includes the account information such as type, client_id, client_email,private_key_id,private_key and access_bucket. For Eg: --gcp_param=type:XXXXXXX,client_id:XXXXXXXX,client_email:XXXXXXXXXX@gmail.com,private_key_id: XXXXXXXXXXXXX,private_key:XXXXXXXXXXXXXXX, access_bucket:XXXXXX -Similarly, xgboostjob_v1alpha1_iris_predict.yaml is used to configure XGBoost job batch prediction. +Similarly, xgboostjob_v1alpha1_iris_predict_gcp.yaml is used to configure XGBoost job batch prediction. **Start the distributed XGBoost train to store the model in cloud**