Skip to content

Commit ec70e8e

Browse files
authored
Merge pull request #203 from marklogic/feature/15636-disable-spark-ui
Added --spark-show-progress-bar to make it conditional
2 parents 956ca3d + 0419726 commit ec70e8e

File tree

6 files changed

+85
-16
lines changed

6 files changed

+85
-16
lines changed

docs/common-options.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,32 @@ Flux will then log the count:
225225
[main] INFO com.marklogic.flux: Count: 1000
226226
```
227227

228+
## Viewing progress
229+
230+
For commands that write data to MarkLogic - which includes all import, copy, and reprocess commands - Flux will log
231+
messages at the `INFO` level showing progress in terms of how much data has been sent to MarkLogic. For import and
232+
copy commands, Flux defaults to logging a message every time approximately 10,000 documents are written. For the
233+
reprocess command, Flux defaults to logging a message every time approximately 10,000 items are reprocessed by
234+
MarkLogic. These values can be adjusted via the `--log-progress` option. The guides on import, copy, and reprocess
235+
provide further details on logging progress.
236+
237+
For export commands, Flux does not log progress as it does not always have visibility into how much data has been
238+
written to an external data source (whereas it does have visibility into how much data has been written to
239+
MarkLogic). You can instead include the `--spark-show-progress-bar` option to enable the progress bar provided
240+
by the underlying [Apache Spark software](https://spark.apache.org/). The Spark progress bar provides details at a
241+
lower level that may not provide visibility into the amount of data that has been exported, but it will at least avoid
242+
the impression that Flux is stuck. Typically, the best approach for monitoring progress for export commands
243+
will be to examine the target destination to see how much data has been written.
244+
228245
## Viewing a stacktrace
229246

230247
When a command fails, Flux will stop execution of the command and display an error message. If you wish to see the
231248
underlying stacktrace associated with the error, run the command with the `--stacktrace` option included. This is
232249
included primarily for debugging purposes, as the stacktrace may be fairly long with only a small portion of it
233250
potentially being helpful. The initial error message displayed by Flux is intended to be as helpful as possible.
251+
252+
## Configuring logging
253+
254+
Flux uses a [Log4J2 properties file](https://logging.apache.org/log4j/2.x/manual/configuration.html#Properties) to
255+
configure its logging. The file is located in a Flux installation at `./conf/log4j2.properties`. You are free to
256+
customize this file to meet your needs for logging.

flux-cli/src/dist/conf/log4j2.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ rootLogger = INFO, STDOUT
55
appender.console.name = STDOUT
66
appender.console.type = Console
77
appender.console.layout.type = PatternLayout
8-
#appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%tn] %p %c: %m%n
9-
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %c: %m%n
8+
#appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss} [%tn] %p %c: %m%n
9+
appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss} %c: %m%n
1010

1111
logger.marklogicclient.name=com.marklogic.client
1212
logger.marklogicclient.level=WARN

flux-cli/src/main/java/com/marklogic/flux/cli/Main.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
*/
44
package com.marklogic.flux.cli;
55

6-
import com.marklogic.flux.impl.AbstractCommand;
7-
import com.marklogic.flux.impl.Command;
8-
import com.marklogic.flux.impl.SparkUtil;
9-
import com.marklogic.flux.impl.VersionCommand;
6+
import com.marklogic.flux.impl.*;
107
import com.marklogic.flux.impl.copy.CopyCommand;
118
import com.marklogic.flux.impl.custom.CustomExportDocumentsCommand;
129
import com.marklogic.flux.impl.custom.CustomExportRowsCommand;
@@ -112,11 +109,15 @@ private int executeCommand(CommandLine.ParseResult parseResult) {
112109

113110
protected SparkSession buildSparkSession(Command selectedCommand) {
114111
String masterUrl = null;
112+
boolean showProgressBar = false;
115113
if (selectedCommand instanceof AbstractCommand) {
116-
masterUrl = ((AbstractCommand) selectedCommand).getCommonParams().getSparkMasterUrl();
114+
CommonParams commonParams = ((AbstractCommand) selectedCommand).getCommonParams();
115+
masterUrl = commonParams.getSparkMasterUrl();
116+
showProgressBar = commonParams.isSparkShowProgressBar();
117117
}
118+
118119
return masterUrl != null && masterUrl.trim().length() > 0 ?
119-
SparkUtil.buildSparkSession(masterUrl) :
120+
SparkUtil.buildSparkSession(masterUrl, showProgressBar) :
120121
SparkUtil.buildSparkSession();
121122
}
122123
}

flux-cli/src/main/java/com/marklogic/flux/impl/CommonParams.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,18 @@ public class CommonParams {
3535
@CommandLine.Option(names = "--stacktrace", description = "Print the stacktrace when a command fails.")
3636
private boolean showStacktrace;
3737

38-
// Hidden for now since showing it for every command in its "help" seems confusing for most users that will likely
39-
// never need to know about this.
4038
@CommandLine.Option(
4139
names = "--spark-master-url",
4240
description = "Specify the Spark master URL for configuring the local Spark cluster created by Flux."
4341
)
4442
private String sparkMasterUrl = "local[*]";
4543

44+
@CommandLine.Option(
45+
names = "--spark-show-progress-bar",
46+
description = "Show the Spark progress bar in the console, which will periodically log Spark stage progress."
47+
)
48+
private boolean sparkShowProgressBar;
49+
4650
@CommandLine.Option(
4751
names = "-C",
4852
description = "Specify any key and value to be added to the Spark runtime configuration; %ne.g. -Cspark.logConf=true."
@@ -82,4 +86,8 @@ public Map<String, String> getConfigParams() {
8286
public Preview getPreview() {
8387
return preview;
8488
}
89+
90+
public boolean isSparkShowProgressBar() {
91+
return sparkShowProgressBar;
92+
}
8593
}

flux-cli/src/main/java/com/marklogic/flux/impl/SparkUtil.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,24 @@ private SparkUtil() {
1616
}
1717

1818
public static SparkSession buildSparkSession() {
19-
return buildSparkSession("local[*]");
19+
return buildSparkSession("local[*]", false);
2020
}
2121

22-
public static SparkSession buildSparkSession(String masterUrl) {
23-
return SparkSession.builder()
22+
public static SparkSession buildSparkSession(String masterUrl, boolean showConsoleProgress) {
23+
SparkSession.Builder builder = SparkSession.builder()
2424
.master(masterUrl)
25-
.config("spark.ui.showConsoleProgress", "true")
26-
.config("spark.sql.session.timeZone", "UTC")
27-
.getOrCreate();
25+
.config("spark.sql.session.timeZone", "UTC");
26+
27+
if (showConsoleProgress) {
28+
// The main value of this is in showing pixels moving. The info provided by Spark - about tasks and stages -
29+
// is usually going to be too low-level for a typical user. But because the Spark progress consoles shows
30+
// an ASCII spinner and some things being updated, it at least gives the user comfort that Flux is not
31+
// frozen. Note as well that for import, copy, and reprocess commands, the --log-progress feature provides
32+
// much more useful progress status.
33+
builder = builder.config("spark.ui.showConsoleProgress", "true");
34+
}
35+
36+
return builder.getOrCreate();
2837
}
2938

3039
/**
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright © 2024 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
3+
*/
4+
package com.marklogic.flux.impl;
5+
6+
import com.marklogic.flux.AbstractTest;
7+
import org.junit.jupiter.api.Test;
8+
9+
class ShowSparkProgressBarTest extends AbstractTest {
10+
11+
@Test
12+
void test() {
13+
run(
14+
"import-files",
15+
"--path", "src/test/resources/mixed-files",
16+
"--filter", "hello*",
17+
"--connection-string", makeConnectionString(),
18+
"--permissions", DEFAULT_PERMISSIONS,
19+
"--collections", "test-files",
20+
"--spark-show-progress-bar"
21+
);
22+
23+
assertCollectionSize(
24+
"This test ensures that include --spark-show-progress-bar doesn't cause any errors.",
25+
"test-files", 4
26+
);
27+
}
28+
}

0 commit comments

Comments
 (0)