@@ -399,6 +399,9 @@ MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS = MLLIB_COMMON_OPTS + [
399399
400400# Generalized Linear Model (GLM) Tests #
401401MLLIB_GLM_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
402+ # The scale factor for the noise in feature values.
403+ # Currently ignored for regression.
404+ OptionSet("feature-noise", [1.0]),
402405 # The number of features per example
403406 OptionSet("num-features", [10000], can_scale=False),
404407 # The number of iterations for SGD
@@ -410,11 +413,6 @@ MLLIB_GLM_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
410413 # Regularization parameter
411414 OptionSet("reg-param", [0.1])
412415]
413- if MLLIB_SPARK_VERSION >= 1.1:
414- MLLIB_GLM_TEST_OPTS += [
415- # Optimization algorithm: sgd, l-bfgs
416- OptionSet("optimizer", ["sgd", "l-bfgs"])
417- ]
418416if MLLIB_SPARK_VERSION >= 1.5:
419417 MLLIB_GLM_TEST_OPTS += [
420418 # Ignored, but required for config
@@ -423,6 +421,8 @@ if MLLIB_SPARK_VERSION >= 1.5:
423421
424422# GLM Regression Tests #
425423MLLIB_GLM_REGRESSION_TEST_OPTS = MLLIB_GLM_TEST_OPTS + [
424+ # Optimization algorithm: sgd
425+ OptionSet("optimizer", ["sgd"]),
426426 # The intercept for the data
427427 OptionSet("intercept", [0.0]),
428428 # The scale factor for label noise
@@ -438,6 +438,8 @@ MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
438438MLLIB_CLASSIFICATION_TEST_OPTS = MLLIB_GLM_TEST_OPTS + [
439439 # Expected fraction of examples which are negative
440440 OptionSet("per-negative", [0.3]),
441+ # Optimization algorithm: sgd, l-bfgs
442+ OptionSet("optimizer", ["sgd", "l-bfgs"])
441443]
442444
443445# GLM Classification Tests #
@@ -464,15 +466,15 @@ if MLLIB_SPARK_VERSION >= 1.5:
464466 OptionSet("reg-param", [0.01]),
465467 # The scale factor for the noise in feature values
466468 OptionSet("feature-noise", [1.0]),
467- # The scale factor for the noise in label values
468- OptionSet("label-noise", [0.1]),
469- # The intercept for the data
470- OptionSet("intercept", [0.2]),
471469 # The step size is not used in LBFGS, but this is required in parameter checking.
472470 OptionSet("step-size", [0.0])
473471 ]
474472
475473 MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS = MLLIB_GLM_ELASTIC_NET_TEST_OPTS + [
474+ # The scale factor for the noise in label values
475+ OptionSet("label-noise", [0.1]),
476+ # The intercept for the data
477+ OptionSet("intercept", [0.2]),
476478 # Loss to minimize: l2 (squared error)
477479 OptionSet("loss", ["l2"])
478480 ]
@@ -486,9 +488,11 @@ if MLLIB_SPARK_VERSION >= 1.5:
486488 MLLIB_TESTS += [("glm-regression", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
487489 MLLIB_JAVA_OPTS, [ConstantOption("glm-regression")] +
488490 MLLIB_GLM_ELASTIC_NET_REGRESSION_TEST_OPTS +
489- [OptionSet("num-features", [100 ], can_scale=False)])]
491+ [OptionSet("num-features", [200 ], can_scale=False)])]
490492
491493 MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS = MLLIB_GLM_ELASTIC_NET_TEST_OPTS + [
494+ # Expected fraction of examples which are negative
495+ OptionSet("per-negative", [0.3]),
492496 # In GLM classification with elastic-net regularization, only logistic loss is supported.
493497 OptionSet("loss", ["logistic"])
494498 ]
@@ -502,7 +506,7 @@ if MLLIB_SPARK_VERSION >= 1.5:
502506 MLLIB_TESTS += [("glm-classification", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
503507 MLLIB_JAVA_OPTS, [ConstantOption("glm-classification")] +
504508 MLLIB_GLM_ELASTIC_NET_CLASSIFICATION_TEST_OPTS +
505- [OptionSet("num-features", [100 ], can_scale=False)])]
509+ [OptionSet("num-features", [200 ], can_scale=False)])]
506510
507511NAIVE_BAYES_TEST_OPTS = MLLIB_REGRESSION_CLASSIFICATION_TEST_OPTS + [
508512 # The number of features per example
@@ -595,10 +599,10 @@ MLLIB_TESTS += [("als", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
595599
596600# Clustering Tests #
597601MLLIB_CLUSTERING_TEST_OPTS = MLLIB_COMMON_OPTS + [
598- # The number of points
599- OptionSet("num-points ", [1000000], can_scale=True),
602+ # The number of examples
603+ OptionSet("num-examples ", [1000000], can_scale=True),
600604 # The number of features per point
601- OptionSet("num-columns ", [10000], can_scale=False),
605+ OptionSet("num-features ", [10000], can_scale=False),
602606 # The number of centers
603607 OptionSet("num-centers", [20]),
604608 # The number of iterations for KMeans
@@ -609,8 +613,8 @@ MLLIB_TESTS += [("kmeans", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
609613 MLLIB_JAVA_OPTS, [ConstantOption("kmeans")] + MLLIB_CLUSTERING_TEST_OPTS)]
610614
611615MLLIB_GMM_TEST_OPTS = MLLIB_COMMON_OPTS + [
612- OptionSet("num-points ", [1000000], can_scale=True),
613- OptionSet("num-columns ", [100], can_scale=False),
616+ OptionSet("num-examples ", [1000000], can_scale=True),
617+ OptionSet("num-features ", [100], can_scale=False),
614618 OptionSet("num-centers", [20], can_scale=False),
615619 OptionSet("num-iterations", [20])]
616620
@@ -630,16 +634,15 @@ if MLLIB_SPARK_VERSION >= 1.4:
630634 MLLIB_TESTS += [("lda", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
631635 MLLIB_JAVA_OPTS, [ConstantOption("lda")] + MLLIB_LDA_TEST_OPTS)]
632636
633- # TODO: tune PIC test size to run in 20-30 seconds
634637MLLIB_PIC_TEST_OPTS = MLLIB_COMMON_OPTS + [
635- OptionSet("num-points ", [10000 ], can_scale=True),
636- OptionSet("node-degree", [10 ], can_scale=False),
637- OptionSet("num-centers", [20 ], can_scale=False),
638+ OptionSet("num-examples ", [10000000 ], can_scale=True),
639+ OptionSet("node-degree", [20 ], can_scale=False),
640+ OptionSet("num-centers", [40 ], can_scale=False),
638641 OptionSet("num-iterations", [20])]
639642
640643if MLLIB_SPARK_VERSION >= 1.3:
641644 MLLIB_TESTS += [("pic", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
642- MLLIB_JAVA_OPTS, [ConstantOption("pic")] + MLLIB_CLUSTERING_TEST_OPTS )]
645+ MLLIB_JAVA_OPTS, [ConstantOption("pic")] + MLLIB_PIC_TEST_OPTS )]
643646
644647# Linear Algebra Tests #
645648MLLIB_LINALG_TEST_OPTS = MLLIB_COMMON_OPTS + [
@@ -668,7 +671,7 @@ MLLIB_TESTS += [("pca", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
668671
669672MLLIB_TESTS += [("summary-statistics", MLLIB_PERF_TEST_RUNNER, SCALE_FACTOR,
670673 MLLIB_JAVA_OPTS, [ConstantOption("summary-statistics")] +
671- MLLIB_LINALG_TEST_OPTS )]
674+ MLLIB_BIG_LINALG_TEST_OPTS )]
672675
673676MLLIB_BLOCK_MATRIX_MULT_TEST_OPTS = MLLIB_COMMON_OPTS + [
674677 OptionSet("m", [20000], can_scale=True),
@@ -752,8 +755,8 @@ if MLLIB_SPARK_VERSION >= 1.3:
752755MLLIB_PREFIX_SPAN_TEST_OPTS = MLLIB_FPM_TEST_OPTS + \
753756 [OptionSet("num-sequences", [5000000], can_scale=True),
754757 OptionSet("avg-sequence-size", [5], can_scale=False),
755- OptionSet("avg-itemset-size", [1 ], can_scale=False),
756- OptionSet("num-items", [100 ], can_scale=False),
758+ OptionSet("avg-itemset-size", [2 ], can_scale=False),
759+ OptionSet("num-items", [500 ], can_scale=False),
757760 OptionSet("min-support", [0.5], can_scale=False),
758761 OptionSet("max-pattern-len", [10], can_scale=False),
759762 OptionSet("max-local-proj-db-size", [32000000], can_scale=False)]
0 commit comments