Skip to content

Commit 5fd082b

Browse files
fschlimbGitHub Enterprise
authored andcommitted
Merge pull request IntelPython#7 from SAT/feature/logreg
adding logistic regression and fixing indents in generated code
2 parents 793910f + c801ad5 commit 5fd082b

File tree

10 files changed

+11436
-3
lines changed

10 files changed

+11436
-3
lines changed

examples/data/batch/binary_cls_test.csv

Lines changed: 2000 additions & 0 deletions
Large diffs are not rendered by default.

examples/data/batch/binary_cls_train.csv

Lines changed: 8000 additions & 0 deletions
Large diffs are not rendered by default.

examples/data/batch/logreg_test.csv

Lines changed: 250 additions & 0 deletions
Large diffs are not rendered by default.

examples/data/batch/logreg_train.csv

Lines changed: 1000 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#*******************************************************************************
2+
# Copyright 2014-2018 Intel Corporation
3+
# All Rights Reserved.
4+
#
5+
# This software is licensed under the Apache License, Version 2.0 (the
6+
# "License"), the following terms apply:
7+
#
8+
# You may not use this file except in compliance with the License. You may
9+
# obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
#
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#*******************************************************************************
18+
19+
# daal4py logistic regression example for shared memory systems
20+
21+
import daal4py as d4p
22+
import numpy as np
23+
24+
# let's try to use pandas' fast csv reader
25+
try:
26+
import pandas
27+
read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',').values
28+
except:
29+
# fall back to numpy loadtxt
30+
read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',')
31+
32+
33+
def main():
34+
nClasses = 2
35+
nFeatures = 20
36+
37+
# read training data from file with 20 features per observation and 1 class label
38+
trainfile = "./data/batch/binary_cls_train.csv"
39+
train_data = read_csv(trainfile, range(nFeatures))
40+
train_labels = read_csv(trainfile, range(nFeatures, nFeatures + 1))
41+
train_labels.shape = (train_data.shape[0], 1) # must be a 2d array
42+
43+
# set parameters and train
44+
train_alg = d4p.logistic_regression_training(nClasses=nClasses)
45+
train_result = train_alg.compute(train_data, train_labels)
46+
47+
# read testing data from file with 20 features per observation
48+
testfile = "./data/batch/binary_cls_test.csv"
49+
predict_data = read_csv(testfile, range(nFeatures))
50+
51+
# set parameters and compute predictions
52+
predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses)
53+
predict_result = predict_alg.compute(predict_data, train_result.model)
54+
55+
# the prediction result provides prediction
56+
assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1])
57+
58+
59+
if __name__ == "__main__":
60+
main()
61+
print('All looks good!')

examples/log_reg_dense_batch.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#*******************************************************************************
2+
# Copyright 2014-2018 Intel Corporation
3+
# All Rights Reserved.
4+
#
5+
# This software is licensed under the Apache License, Version 2.0 (the
6+
# "License"), the following terms apply:
7+
#
8+
# You may not use this file except in compliance with the License. You may
9+
# obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
#
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#*******************************************************************************
18+
19+
# daal4py logistic regression example for shared memory systems
20+
21+
import daal4py as d4p
22+
import numpy as np
23+
24+
# let's try to use pandas' fast csv reader
25+
try:
26+
import pandas
27+
read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',').values
28+
except:
29+
# fall back to numpy loadtxt
30+
read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',')
31+
32+
33+
def main():
34+
nClasses = 5
35+
nFeatures = 6
36+
37+
# read training data from file with 6 features per observation and 1 class label
38+
trainfile = "./data/batch/logreg_train.csv"
39+
train_data = read_csv(trainfile, range(nFeatures))
40+
train_labels = read_csv(trainfile, range(nFeatures, nFeatures + 1))
41+
train_labels.shape = (train_data.shape[0], 1) # must be a 2d array
42+
43+
# set parameters and train
44+
train_alg = d4p.logistic_regression_training(nClasses=nClasses,
45+
penaltyL1=0.1,
46+
penaltyL2=0.1)
47+
train_result = train_alg.compute(train_data, train_labels)
48+
49+
# read testing data from file with 6 features per observation
50+
testfile = "./data/batch/logreg_test.csv"
51+
predict_data = read_csv(testfile, range(nFeatures))
52+
53+
# set parameters and compute predictions
54+
predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses,
55+
resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities")
56+
predict_result = predict_alg.compute(predict_data, train_result.model)
57+
58+
# the prediction result provides prediction, probabilities and logProbabilities
59+
assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1])
60+
assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses)
61+
assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses)
62+
63+
64+
if __name__ == "__main__":
65+
main()
66+
print('All looks good!')

generator/gen_daal4py.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,10 @@ def get_all_attrs(self, ns, cls, attr, ons=None):
257257

258258

259259
###############################################################################
260-
def to_lltype(self, ns, t):
260+
def to_lltype(self, t):
261261
"""
262262
return low level (C++ type). Usually the same as input.
263-
Only very specific casesneed a conversion.
263+
Only very specific cases need a conversion.
264264
"""
265265
if t in ['DAAL_UINT64']:
266266
return 'ResultToComputeId'
@@ -290,6 +290,9 @@ def to_hltype(self, ns, t):
290290
return ('daal::' + thens + '::ModelPtr', 'class', tns)
291291
if t in ['data_management::NumericTablePtr',] or t in ifaces.values():
292292
return ('daal::' + t, 'class', tns)
293+
if 'Batch' in self.namespace_dict[ns].classes and t in self.namespace_dict[ns].classes['Batch'].typedefs:
294+
tns, tname = splitns(self.namespace_dict[ns].classes['Batch'].typedefs[t])
295+
return (self.namespace_dict[ns].classes['Batch'].typedefs[t], 'class', tns)
293296
tt = re.sub(r'(?<!daal::)services::SharedPtr', r'daal::services::SharedPtr', t)
294297
tt = re.sub(r'(?<!daal::)algorithms::', r'daal::algorithms::', tt)
295298
if tt in ifaces.values():
@@ -612,7 +615,7 @@ def prepare_hlwrapper(self, ns, mode, func):
612615
hlt = self.to_hltype(pns, parms[p])
613616
if hlt and hlt[1] in ['stdtype', 'enum', 'class']:
614617
(hlt, hlt_type, hlt_ns) = hlt
615-
llt = self.to_lltype(*splitns(parms[p]))
618+
llt = self.to_lltype(parms[p])
616619
needed = True
617620
pval = None
618621
if hlt_type == 'enum':
@@ -814,6 +817,7 @@ def gen_daal4py(daalroot, outdir, warn_all=False):
814817
'decision_forest',
815818
'ridge_regression',
816819
'optimization_solver',
820+
'logistic_regression',
817821
])
818822
# 'ridge_regression', parametertype is a template without any need
819823
with open(jp(outdir, 'daal4py_cpp.h'), 'w') as f:

generator/wrappers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@
4545
'algorithms::gbt::classification::prediction': {
4646
'Batch': [('nClasses', 'size_t')],
4747
},
48+
'algorithms::logistic_regression::training': {
49+
'Batch': [('nClasses', 'size_t')],
50+
},
51+
'algorithms::logistic_regression::prediction': {
52+
'Batch': [('nClasses', 'size_t')],
53+
},
4854
'algorithms::decision_tree::classification::training': {
4955
'Batch': [('nClasses', 'size_t')],
5056
},
@@ -100,6 +106,7 @@
100106
'algorithms::kmeans::init': ['nRowsTotal', 'offset',],
101107
'algorithms::gbt::regression::training': ['dependentVariables'],
102108
'algorithms::gbt::classification::training': ['weights',],
109+
'algorithms::logistic_regression::training': ['weights',],
103110
'algorithms::decision_tree::classification::training': ['weights',],
104111
'algorithms::decision_forest::classification::training': ['weights', 'updatedEngine',],
105112
'algorithms::decision_forest::regression::training': ['algorithms::regression::training::InputId', 'updatedEngine',],
@@ -123,6 +130,7 @@
123130
'classifier::training::Batch': 'daal::services::SharedPtr<daal::algorithms::classifier::training::Batch>',
124131
'engines::BatchBase': 'daal::algorithms::engines::EnginePtr',
125132
'optimization_solver::sum_of_functions::Batch': 'daal::algorithms::optimization_solver::sum_of_functions::BatchPtr',
133+
'optimization_solver::iterative_solver::Batch': 'daal::algorithms::optimization_solver::iterative_solver::BatchPtr',
126134
}
127135

128136
# By default input arguments have no default value (e.g. they are required).
@@ -391,10 +399,12 @@
391399
'algorithms::decision_tree::regression': ['Result',],
392400
'algorithms::engines::mcg59::Batch': ['ParameterType',],
393401
'algorithms::engines::mt19937::Batch': ['ParameterType',],
402+
'algorithms::engines::mt2203::Batch': ['ParameterType',],
394403
'algorithms::gbt': ['Result',],
395404
'algorithms::gbt::classification': ['Result',],
396405
'algorithms::gbt::regression': ['Result',],
397406
'algorithms::gbt::training': ['Result',],
407+
'algorithms::logistic_regression': ['Result',],
398408
'algorithms::linear_model': ['Result',],
399409
'algorithms::linear_regression': ['Result',],
400410
'algorithms::linear_regression::prediction::Batch': ['ParameterType',],

src/daal4py.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#define _HLAPI_H_INCLUDED_
1919

2020
#include <daal.h>
21+
#include "daal_compat.h"
2122

2223
#include <iostream>
2324
#include <stdexcept>

src/daal_compat.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*******************************************************************************
2+
* Copyright 2014-2018 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*******************************************************************************/
16+
17+
#ifndef _DAALCOMPAT_H_INCLUDED_
18+
#define _DAALCOMPAT_H_INCLUDED_
19+
20+
#include <services/library_version_info.h>
21+
#include <services/daal_shared_ptr.h>
22+
23+
// DAAL version < 2018 is what we are looking for.
24+
// Some DAAL versions seem broken, e.g. '2199' so we need to check that, too
25+
#if __INTEL_DAAL__ < 2019 || __INTEL_DAAL__ > 2100
26+
27+
namespace daal {
28+
namespace algorithms {
29+
namespace optimization_solver {
30+
namespace iterative_solver {
31+
// BatchPtr typedef not existent in older DAAL versions
32+
typedef daal::services::SharedPtr<interface1::Batch> BatchPtr;
33+
34+
}
35+
}
36+
}
37+
}
38+
39+
#endif
40+
41+
#endif // _DAALCOMPAT_H_INCLUDED_

0 commit comments

Comments
 (0)