@@ -399,6 +399,9 @@ MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS = MLLIB_COMMON_OPTS + [
399
399
400
400
# Generalized Linear Model (GLM) Tests #
401
401
MLLIB_GLM_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
402
+ # The scale factor for the noise in feature values.
403
+ # Currently ignored for regression.
404
+ OptionSet("feature-noise", [1.0]),
402
405
# The number of features per example
403
406
OptionSet("num-features", [10000], can_scale=False),
404
407
# The number of iterations for SGD
@@ -410,11 +413,6 @@ MLLIB_GLM_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
410
413
# Regularization parameter
411
414
OptionSet("reg-param", [0.1])
412
415
]
413
- if MLLIB_SPARK_VERSION >= 1.1:
414
- MLLIB_GLM_TEST_OPTS += [
415
- # Optimization algorithm: sgd, l-bfgs
416
- OptionSet("optimizer", ["sgd", "l-bfgs"])
417
- ]
418
416
if MLLIB_SPARK_VERSION >= 1.5:
419
417
MLLIB_GLM_TEST_OPTS += [
420
418
# Ignored, but required for config
@@ -423,6 +421,8 @@ if MLLIB_SPARK_VERSION >= 1.5:
423
421
424
422
# GLM Regression Tests #
425
423
MLLIB_GLM_REGRESSION_TEST_OPTS = MLLIB_GLM_TEST_OPTS + [
424
+ # Optimization algorithm: sgd
425
+ OptionSet("optimizer", ["sgd"]),
426
426
# The intercept for the data
427
427
OptionSet("intercept", [0.0]),
428
428
# The scale factor for label noise
@@ -438,6 +438,8 @@ MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
438
438
MLLIB_CLASSIFICATION_TEST_OPTS = MLLIB_GLM_TEST_OPTS + [
439
439
# Expected fraction of examples which are negative
440
440
OptionSet("per-negative", [0.3]),
441
+ # Optimization algorithm: sgd, l-bfgs
442
+ OptionSet("optimizer", ["sgd", "l-bfgs"])
441
443
]
442
444
443
445
# GLM Classification Tests #
@@ -464,15 +466,15 @@ if MLLIB_SPARK_VERSION >= 1.5:
464
466
OptionSet("reg-param", [0.01]),
465
467
# The scale factor for the noise in feature values
466
468
OptionSet("feature-noise", [1.0]),
467
- # The scale factor for the noise in label values
468
- OptionSet("label-noise", [0.1]),
469
- # The intercept for the data
470
- OptionSet("intercept", [0.2]),
471
469
# The step size is not used in LBFGS, but this is required in parameter checking.
472
470
OptionSet("step-size", [0.0])
473
471
]
474
472
475
473
MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS = MLLIB_GLM_ELASTIC_NET_TEST_OPTS + [
474
+ # The scale factor for the noise in label values
475
+ OptionSet("label-noise", [0.1]),
476
+ # The intercept for the data
477
+ OptionSet("intercept", [0.2]),
476
478
# Loss to minimize: l2 (squared error)
477
479
OptionSet("loss", ["l2"])
478
480
]
@@ -486,9 +488,11 @@ if MLLIB_SPARK_VERSION >= 1.5:
486
488
MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
487
489
MLLIB_JAVA_OPTS, [ConstantOption("glm-regression")] +
488
490
MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS +
489
- [OptionSet("num-features", [100 ], can_scale=False)])]
491
+ [OptionSet("num-features", [200 ], can_scale=False)])]
490
492
491
493
MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS = MLLIB_GLM_ELASTIC_NET_TEST_OPTS + [
494
+ # Expected fraction of examples which are negative
495
+ OptionSet("per-negative", [0.3]),
492
496
# In GLM classification with elastic-net regularization, only logistic loss is supported.
493
497
OptionSet("loss", ["logistic"])
494
498
]
@@ -502,7 +506,7 @@ if MLLIB_SPARK_VERSION >= 1.5:
502
506
MLLIB_TESTS += [("glm-classification", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
503
507
MLLIB_JAVA_OPTS, [ConstantOption("glm-classification")] +
504
508
MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS +
505
- [OptionSet("num-features", [100 ], can_scale=False)])]
509
+ [OptionSet("num-features", [200 ], can_scale=False)])]
506
510
507
511
NAIVE_BAYES_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
508
512
# The number of features per example
@@ -595,10 +599,10 @@ MLLIB_TESTS += [("als", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
595
599
596
600
# Clustering Tests #
597
601
MLLIB_CLUSTERING_TEST_OPTS = MLLIB_COMMON_OPTS + [
598
- # The number of points
599
- OptionSet("num-points ", [1000000], can_scale=True),
602
+ # The number of examples
603
+ OptionSet("num-examples ", [1000000], can_scale=True),
600
604
# The number of features per point
601
- OptionSet("num-columns ", [10000], can_scale=False),
605
+ OptionSet("num-features ", [10000], can_scale=False),
602
606
# The number of centers
603
607
OptionSet("num-centers", [20]),
604
608
# The number of iterations for KMeans
@@ -609,8 +613,8 @@ MLLIB_TESTS += [("kmeans", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
609
613
MLLIB_JAVA_OPTS, [ConstantOption("kmeans")] + MLLIB_CLUSTERING_TEST_OPTS)]
610
614
611
615
MLLIB_GMM_TEST_OPTS = MLLIB_COMMON_OPTS + [
612
- OptionSet("num-points ", [1000000], can_scale=True),
613
- OptionSet("num-columns ", [100], can_scale=False),
616
+ OptionSet("num-examples ", [1000000], can_scale=True),
617
+ OptionSet("num-features ", [100], can_scale=False),
614
618
OptionSet("num-centers", [20], can_scale=False),
615
619
OptionSet("num-iterations", [20])]
616
620
@@ -630,16 +634,15 @@ if MLLIB_SPARK_VERSION >= 1.4:
630
634
MLLIB_TESTS += [("lda", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
631
635
MLLIB_JAVA_OPTS, [ConstantOption("lda")] + MLLIB_LDA_TEST_OPTS)]
632
636
633
- # TODO: tune PIC test size to run in 20-30 seconds
634
637
MLLIB_PIC_TEST_OPTS = MLLIB_COMMON_OPTS + [
635
- OptionSet("num-points ", [10000 ], can_scale=True),
636
- OptionSet("node-degree", [10 ], can_scale=False),
637
- OptionSet("num-centers", [20 ], can_scale=False),
638
+ OptionSet("num-examples ", [10000000 ], can_scale=True),
639
+ OptionSet("node-degree", [20 ], can_scale=False),
640
+ OptionSet("num-centers", [40 ], can_scale=False),
638
641
OptionSet("num-iterations", [20])]
639
642
640
643
if MLLIB_SPARK_VERSION >= 1.3:
641
644
MLLIB_TESTS += [("pic", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
642
- MLLIB_JAVA_OPTS, [ConstantOption("pic")] + MLLIB_CLUSTERING_TEST_OPTS )]
645
+ MLLIB_JAVA_OPTS, [ConstantOption("pic")] + MLLIB_PIC_TEST_OPTS )]
643
646
644
647
# Linear Algebra Tests #
645
648
MLLIB_LINALG_TEST_OPTS = MLLIB_COMMON_OPTS + [
@@ -668,7 +671,7 @@ MLLIB_TESTS += [("pca", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
668
671
669
672
MLLIB_TESTS += [("summary-statistics", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
670
673
MLLIB_JAVA_OPTS, [ConstantOption("summary-statistics")] +
671
- MLLIB_LINALG_TEST_OPTS )]
674
+ MLLIB_BIG_LINALG_TEST_OPTS )]
672
675
673
676
MLLIB_BLOCK_MATRIX_MULT_TEST_OPTS = MLLIB_COMMON_OPTS + [
674
677
OptionSet("m", [20000], can_scale=True),
@@ -752,8 +755,8 @@ if MLLIB_SPARK_VERSION >= 1.3:
752
755
MLLIB_PREFIX_SPAN_TEST_OPTS = MLLIB_FPM_TEST_OPTS + \
753
756
[OptionSet("num-sequences", [5000000], can_scale=True),
754
757
OptionSet("avg-sequence-size", [5], can_scale=False),
755
- OptionSet("avg-itemset-size", [1 ], can_scale=False),
756
- OptionSet("num-items", [100 ], can_scale=False),
758
+ OptionSet("avg-itemset-size", [2 ], can_scale=False),
759
+ OptionSet("num-items", [500 ], can_scale=False),
757
760
OptionSet("min-support", [0.5], can_scale=False),
758
761
OptionSet("max-pattern-len", [10], can_scale=False),
759
762
OptionSet("max-local-proj-db-size", [32000000], can_scale=False)]
0 commit comments