@@ -394,13 +394,13 @@ MLLIB_COMMON_OPTS = COMMON_OPTS + [
394
394
# Regression and Classification Tests #
395
395
MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS = MLLIB_COMMON_OPTS + [
396
396
# The number of rows or examples
397
- OptionSet("num-examples", [1000000], can_scale=True),
398
- # The number of features per example
399
- OptionSet("num-features", [10000], can_scale=False)
397
+ OptionSet("num-examples", [1000000], can_scale=True)
400
398
]
401
399
402
400
# Generalized Linear Model (GLM) Tests #
403
401
MLLIB_GLM_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
402
+ # The number of features per example
403
+ OptionSet("num-features", [10000], can_scale=False),
404
404
# The number of iterations for SGD
405
405
OptionSet("num-iterations", [20]),
406
406
# The step size for SGD
@@ -412,8 +412,8 @@ MLLIB_GLM_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
412
412
]
413
413
if MLLIB_SPARK_VERSION >= 1.1:
414
414
MLLIB_GLM_TEST_OPTS += [
415
- # Optimization algorithm: sgd, lbfgs
416
- OptionSet("optimizer", ["sgd", "lbfgs "])
415
+ # Optimization algorithm: sgd, l-bfgs
416
+ OptionSet("optimizer", ["sgd", "l-bfgs "])
417
417
]
418
418
if MLLIB_SPARK_VERSION >= 1.5:
419
419
MLLIB_GLM_TEST_OPTS += [
@@ -425,47 +425,19 @@ if MLLIB_SPARK_VERSION >= 1.5:
425
425
MLLIB_GLM_REGRESSION_TEST_OPTS = MLLIB_GLM_TEST_OPTS + [
426
426
# The intercept for the data
427
427
OptionSet("intercept", [0.0]),
428
- # The scale factor for the noise
429
- OptionSet("epsilon ", [0.1]),
428
+ # The scale factor for label noise
429
+ OptionSet("label-noise ", [0.1]),
430
430
# Loss to minimize: l2 (squared error)
431
431
OptionSet("loss", ["l2"])
432
432
]
433
433
434
434
MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
435
435
MLLIB_JAVA_OPTS, [ConstantOption("glm-regression")] + MLLIB_GLM_REGRESSION_TEST_OPTS)]
436
436
437
- if MLLIB_SPARK_VERSION >= 1.5:
438
- MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
439
- # Loss to minimize: l2 (squared error)
440
- OptionSet("loss", ["l2"]),
441
- # The max number of iterations for LBFGS/OWLQN
442
- OptionSet("num-iterations", [20]),
443
- # LBFGS/OWLQN is used with elastic-net regularization.
444
- OptionSet("optimizer", ["lbfgs"]),
445
- # Using elastic-net regularization.
446
- OptionSet("reg-type", ["elastic-net"]),
447
- # Runs with L2 (param = 0.0), L1 (param = 1.0).
448
- OptionSet("elastic-net-param", [0.0, 1.0]),
449
- # Regularization param (lambda)
450
- OptionSet("reg-param", [0.01]),
451
- # The scale factor for the noise
452
- OptionSet("epsilon", [0.1]),
453
- # The intercept for the data
454
- OptionSet("intercept", [0.0]),
455
- # The step size is not used in LBFGS, but this is required in parameter checking.
456
- OptionSet("step-size", [0.0])
457
- ]
458
-
459
- MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
460
- MLLIB_JAVA_OPTS, [ConstantOption("glm-regression")] +
461
- MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS)]
462
-
463
437
# Classification Tests #
464
438
MLLIB_CLASSIFICATION_TEST_OPTS = MLLIB_GLM_TEST_OPTS + [
465
- # Expected fraction of examples which are negative
466
- OptionSet("per-negative", [0.3]),
467
- # The scale factor for the noise in feature values
468
- OptionSet("scale-factor", [1.0])
439
+ # Expected fraction of examples which are negative
440
+ OptionSet("per-negative", [0.3]),
469
441
]
470
442
471
443
# GLM Classification Tests #
@@ -475,38 +447,70 @@ MLLIB_GLM_CLASSIFICATION_TEST_OPTS = MLLIB_CLASSIFICATION_TEST_OPTS + [
475
447
]
476
448
477
449
MLLIB_TESTS += [("glm-classification", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
478
- MLLIB_JAVA_OPTS, [ConstantOption("glm-classification")] +
479
- MLLIB_GLM_CLASSIFICATION_TEST_OPTS)]
450
+ MLLIB_JAVA_OPTS, [ConstantOption("glm-classification")] +
451
+ MLLIB_GLM_CLASSIFICATION_TEST_OPTS)]
480
452
481
453
if MLLIB_SPARK_VERSION >= 1.5:
482
- MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS = MLLIB_GLM_CLASSIFICATION_TEST_OPTS + [
483
- # In GLM classification with elastic-net regularization, only logistic loss is supported.
484
- OptionSet("loss ", ["logistic" ]),
454
+ MLLIB_GLM_ELASTIC_NET_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
455
+ # The max number of iterations for LBFGS/OWLQN
456
+ OptionSet("num-iterations ", [20 ]),
485
457
# LBFGS/OWLQN is used with elastic-net regularization.
486
- OptionSet("optimizer", ["lbfgs "]),
458
+ OptionSet("optimizer", ["auto "]),
487
459
# Using elastic-net regularization.
488
460
OptionSet("reg-type", ["elastic-net"]),
489
461
# Runs with L2 (param = 0.0), L1 (param = 1.0).
490
462
OptionSet("elastic-net-param", [0.0, 1.0]),
491
463
# Regularization param (lambda)
492
464
OptionSet("reg-param", [0.01]),
493
- # The scale factor for the noise
494
- OptionSet("epsilon", [0.1]),
465
+ # The scale factor for the noise in feature values
466
+ OptionSet("feature-noise", [1.0]),
467
+ # The scale factor for the noise in label values
468
+ OptionSet("label-noise", [0.1]),
495
469
# The intercept for the data
496
- OptionSet("intercept", [0.0 ]),
470
+ OptionSet("intercept", [0.2 ]),
497
471
# The step size is not used in LBFGS, but this is required in parameter checking.
498
472
OptionSet("step-size", [0.0])
499
473
]
500
474
475
+ MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS = MLLIB_GLM_ELASTIC_NET_TEST_OPTS + [
476
+ # Loss to minimize: l2 (squared error)
477
+ OptionSet("loss", ["l2"])
478
+ ]
479
+
480
+ # Test L-BFGS
481
+ MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
482
+ MLLIB_JAVA_OPTS, [ConstantOption("glm-regression")] +
483
+ MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS +
484
+ [OptionSet("num-features", [10000], can_scale=False)])]
485
+ # Test normal equation solver
486
+ MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
487
+ MLLIB_JAVA_OPTS, [ConstantOption("glm-regression")] +
488
+ MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS +
489
+ [OptionSet("num-features", [100], can_scale=False)])]
490
+
491
+ MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS = MLLIB_GLM_ELASTIC_NET_TEST_OPTS + [
492
+ # In GLM classification with elastic-net regularization, only logistic loss is supported.
493
+ OptionSet("loss", ["logistic"])
494
+ ]
495
+
496
+ # Test L-BFGS
501
497
MLLIB_TESTS += [("glm-classification", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
502
- MLLIB_JAVA_OPTS, [ConstantOption("glm-classification")] +
503
- MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS)]
498
+ MLLIB_JAVA_OPTS, [ConstantOption("glm-classification")] +
499
+ MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS +
500
+ [OptionSet("num-features", [10000], can_scale=False)])]
501
+ # Test normal equation solver
502
+ MLLIB_TESTS += [("glm-classification", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
503
+ MLLIB_JAVA_OPTS, [ConstantOption("glm-classification")] +
504
+ MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS +
505
+ [OptionSet("num-features", [100], can_scale=False)])]
504
506
505
507
NAIVE_BAYES_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
508
+ # The number of features per example
509
+ OptionSet("num-features", [10000], can_scale=False),
506
510
# Expected fraction of examples which are negative
507
511
OptionSet("per-negative", [0.3]),
508
512
# The scale factor for the noise in feature values
509
- OptionSet("scale-factor ", [1.0]),
513
+ OptionSet("feature-noise ", [1.0]),
510
514
# Naive Bayes smoothing lambda.
511
515
OptionSet("nb-lambda", [1.0]),
512
516
# Model type: either multinomial or bernoulli (bernoulli only available in Spark 1.4+)
0 commit comments