-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[vpj] Add Spark as a compute engine for DataWriter job (#851)
This PR adds Spark as an alternate compute engine to run the Data Writer task. Currently, it does not handle KIF and the code is written in a way that it will fall back to the MapReduce implementation in case KIF is used. This will be addressed in a follow up PR. To use the new Spark based flow, set the following VPJ config: "data.writer.compute.job.class = com.linkedin.venice.hadoop.spark.datawriter.jobs.DataWriterSparkJob" VPJ has the following new configs: 1. "venice.spark.cluster": This will configure the spark.master config. * Refer to https://spark.apache.org/docs/latest/submitting-applications.html#master-urls for the supported values 2. "venice.spark.session.conf.*" * Configs with this prefix will be set when building the spark session * These will get applied to all Spark jobs that get triggered as a part of VPJ * It can be used to configure arbitrary cluster properties 3. "spark.data.writer.conf.*" * Configs with this prefix will be set when building the data writer spark job and passed as job properties * These will only get applied on the DataWriter Spark jobs * It is useful when there are custom input formats which need additional configs to be able to read the data
- Loading branch information
1 parent
3eff563
commit d14722a
Showing
53 changed files
with
2,947 additions
and
123 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
...n/java/com/linkedin/venice/hadoop/input/recordreader/avro/IdentityVeniceRecordReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package com.linkedin.venice.hadoop.input.recordreader.avro; | ||
|
||
import com.linkedin.venice.exceptions.VeniceUnsupportedOperationException; | ||
import com.linkedin.venice.hadoop.input.recordreader.AbstractVeniceRecordReader; | ||
import com.linkedin.venice.utils.ByteUtils; | ||
import java.nio.ByteBuffer; | ||
import org.apache.avro.Schema; | ||
|
||
|
||
/** | ||
* A record reader that returns the input key and value as is. | ||
*/ | ||
public class IdentityVeniceRecordReader extends AbstractVeniceRecordReader<ByteBuffer, ByteBuffer> { | ||
private static final IdentityVeniceRecordReader INSTANCE = new IdentityVeniceRecordReader(); | ||
|
||
private IdentityVeniceRecordReader() { | ||
final Schema BYTES_SCHEMA = Schema.create(Schema.Type.BYTES); | ||
configure(BYTES_SCHEMA, BYTES_SCHEMA); | ||
} | ||
|
||
public static IdentityVeniceRecordReader getInstance() { | ||
return INSTANCE; | ||
} | ||
|
||
@Override | ||
public Object getAvroKey(ByteBuffer keyBytes, ByteBuffer valueBytes) { | ||
throw new VeniceUnsupportedOperationException("getAvroKey in IdentityVeniceRecordReader"); | ||
} | ||
|
||
@Override | ||
public byte[] getKeyBytes(ByteBuffer keyBuffer, ByteBuffer valueBuffer) { | ||
return ByteUtils.extractByteArray(keyBuffer); | ||
} | ||
|
||
@Override | ||
public Object getAvroValue(ByteBuffer keyBytes, ByteBuffer valueBytes) { | ||
throw new VeniceUnsupportedOperationException("getAvroValue in IdentityVeniceRecordReader"); | ||
} | ||
|
||
@Override | ||
public byte[] getValueBytes(ByteBuffer keyBuffer, ByteBuffer valueBuffer) { | ||
return ByteUtils.extractByteArray(valueBuffer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.