Skip to content

Commit b8f1b19

Browse files
Added support for a configurable delete policy. Fixes #52. (#53)
1 parent b85ffb6 commit b8f1b19

File tree

2 files changed

+78
-53
lines changed

2 files changed

+78
-53
lines changed

src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSourceConnectorConfig.java

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ public abstract class SpoolDirSourceConnectorConfig extends AbstractConfig {
7575
public static final String SCHEMA_GENERATION_VALUE_NAME_CONF = "schema.generation.value.name";
7676
public static final String SCHEMA_GENERATION_ENABLED_CONF = "schema.generation.enabled";
7777
public static final String METADATA_SCHEMA_NAME = "com.github.jcustenborder.kafka.connect.spooldir.Metadata";
78+
public static final String CLEANUP_POLICY_CONF = "cleanup.policy";
79+
public static final String CLEANUP_POLICY_DOC = "Determines how the connector should cleanup the " +
80+
"files that have been successfully processed. NONE leaves the files in place which could " +
81+
"cause them to be reprocessed if the connector is restarted. DELETE removes the file from the " +
82+
"filesystem. MOVE will move the file to a finished directory.";
83+
public static final String GROUP_FILESYSTEM = "File System";
84+
public static final String GROUP_SCHEMA_GENERATION = "Schema Generation";
85+
public static final String GROUP_SCHEMA = "Schema";
86+
public static final String GROUP_GENERAL = "General";
87+
public static final String GROUP_TIMESTAMP = "Timestamps";
7888
static final String TIMESTAMP_FIELD_DOC = "The field in the value schema that will contain the parsed timestamp for the record. " +
7989
"This field cannot be marked as optional and must be a " +
8090
"[Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html)";
@@ -137,11 +147,18 @@ public abstract class SpoolDirSourceConnectorConfig extends AbstractConfig {
137147
public final String schemaGenerationValueName;
138148
public boolean hasKeyMetadataField;
139149
public boolean hasvalueMetadataField;
140-
150+
public CleanupPolicy cleanupPolicy;
141151
public SpoolDirSourceConnectorConfig(final boolean isTask, ConfigDef configDef, Map<String, ?> settings) {
142152
super(configDef, settings);
143153
this.inputPath = ConfigUtils.getAbsoluteFile(this, INPUT_PATH_CONFIG);
144-
this.finishedPath = ConfigUtils.getAbsoluteFile(this, FINISHED_PATH_CONFIG);
154+
this.cleanupPolicy = ConfigUtils.getEnum(CleanupPolicy.class, this, CLEANUP_POLICY_CONF);
155+
156+
if (CleanupPolicy.MOVE == this.cleanupPolicy) {
157+
this.finishedPath = ConfigUtils.getAbsoluteFile(this, FINISHED_PATH_CONFIG);
158+
} else {
159+
this.finishedPath = null;
160+
}
161+
145162
this.errorPath = ConfigUtils.getAbsoluteFile(this, ERROR_PATH_CONFIG);
146163
this.haltOnError = this.getBoolean(HALT_ON_ERROR_CONF);
147164
this.minimumFileAgeMS = this.getLong(FILE_MINIMUM_AGE_MS_CONF);
@@ -271,9 +288,6 @@ public SpoolDirSourceConnectorConfig(final boolean isTask, ConfigDef configDef,
271288
this.inputFilenameFilter = new PatternFilenameFilter(inputPattern);
272289
}
273290

274-
public abstract boolean schemasRequired();
275-
276-
277291
private static final Field findMetadataField(Schema schema) {
278292
Field result = null;
279293
for (Field field : schema.fields()) {
@@ -287,12 +301,6 @@ private static final Field findMetadataField(Schema schema) {
287301
return result;
288302
}
289303

290-
public static final String GROUP_FILESYSTEM = "File System";
291-
public static final String GROUP_SCHEMA_GENERATION = "Schema Generation";
292-
public static final String GROUP_SCHEMA = "Schema";
293-
public static final String GROUP_GENERAL = "General";
294-
public static final String GROUP_TIMESTAMP = "Timestamps";
295-
296304
public static ConfigDef config() {
297305

298306
ConfigDef.Recommender schemaRecommender = new ConfigDef.Recommender() {
@@ -325,6 +333,23 @@ public boolean visible(String key, Map<String, Object> settings) {
325333
}
326334
};
327335

336+
ConfigDef.Recommender finishedPath = new ConfigDef.Recommender() {
337+
@Override
338+
public List<Object> validValues(String s, Map<String, Object> map) {
339+
return null;
340+
}
341+
342+
@Override
343+
public boolean visible(String s, Map<String, Object> map) {
344+
if (!FINISHED_PATH_CONFIG.equals(s)) {
345+
return true;
346+
}
347+
348+
final String cleanupPolicy = (String) map.get(CLEANUP_POLICY_CONF);
349+
return CleanupPolicy.MOVE.toString().equals(cleanupPolicy);
350+
}
351+
};
352+
328353

329354
return new ConfigDef()
330355

@@ -351,7 +376,15 @@ public boolean visible(String key, Map<String, Object> settings) {
351376
.group(GROUP_GENERAL)
352377
.build()
353378
)
354-
379+
.define(
380+
ConfigKeyBuilder.of(CLEANUP_POLICY_CONF, ConfigDef.Type.STRING)
381+
.documentation(CLEANUP_POLICY_DOC)
382+
.importance(ConfigDef.Importance.MEDIUM)
383+
.validator(ValidEnum.of(CleanupPolicy.class))
384+
.defaultValue(CleanupPolicy.MOVE.toString())
385+
.group(GROUP_FILESYSTEM)
386+
.build()
387+
)
355388
// Filesystem
356389
.define(
357390
ConfigKeyBuilder.of(INPUT_PATH_CONFIG, ConfigDef.Type.STRING)
@@ -364,7 +397,8 @@ public boolean visible(String key, Map<String, Object> settings) {
364397
ConfigKeyBuilder.of(FINISHED_PATH_CONFIG, ConfigDef.Type.STRING)
365398
.documentation(FINISHED_PATH_DOC)
366399
.importance(ConfigDef.Importance.HIGH)
367-
.validator(ValidDirectoryWritable.of())
400+
.defaultValue("")
401+
.recommender(finishedPath)
368402
.group(GROUP_FILESYSTEM)
369403
.build()
370404
).define(
@@ -503,6 +537,8 @@ public boolean visible(String key, Map<String, Object> settings) {
503537
);
504538
}
505539

540+
public abstract boolean schemasRequired();
541+
506542
Schema readSchema(final String key) {
507543
String schema = this.getString(key);
508544
Schema result;
@@ -525,4 +561,10 @@ public enum TimestampMode {
525561
FILE_TIME,
526562
PROCESS_TIME
527563
}
564+
565+
public enum CleanupPolicy {
566+
NONE,
567+
DELETE,
568+
MOVE
569+
}
528570
}

src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSourceTask.java

Lines changed: 23 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -201,45 +201,6 @@ private void closeAndMoveToFinished(File outputDirectory, boolean errored) throw
201201
}
202202
}
203203

204-
205-
// File findNextInputFile() {
206-
// File[] input = this.config.inputPath.listFiles(this.config.inputFilenameFilter);
207-
// if (null == input || input.length == 0) {
208-
// log.debug("No files matching {} were found in {}", SpoolDirSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, this.config.inputPath);
209-
// return null;
210-
// }
211-
// List<File> files = new ArrayList<>(input.length);
212-
// for (File f : input) {
213-
// File processingFile = InputFileDequeue.processingFile(this.config, f);
214-
// log.trace("Checking for processing file: {}", processingFile);
215-
//
216-
// if (processingFile.exists()) {
217-
// log.debug("Skipping {} because processing file exists.", f);
218-
// continue;
219-
// }
220-
// files.add(f);
221-
// }
222-
//
223-
// File result = null;
224-
//
225-
// for (File file : files) {
226-
// long fileAgeMS = System.currentTimeMillis() - file.lastModified();
227-
//
228-
// if (fileAgeMS < 0L) {
229-
// log.warn("File {} has a date in the future.", file);
230-
// }
231-
//
232-
// if (this.config.minimumFileAgeMS > 0L && fileAgeMS < this.config.minimumFileAgeMS) {
233-
// log.debug("Skipping {} because it does not meet the minimum age.", file);
234-
// continue;
235-
// }
236-
// result = file;
237-
// break;
238-
// }
239-
//
240-
// return result;
241-
// }
242-
243204
static final Map<String, String> SUPPORTED_COMPRESSION_TYPES = ImmutableMap.of(
244205
"bz2", CompressorStreamFactory.BZIP2,
245206
"gz", CompressorStreamFactory.GZIP,
@@ -251,7 +212,14 @@ private void closeAndMoveToFinished(File outputDirectory, boolean errored) throw
251212
public List<SourceRecord> read() {
252213
try {
253214
if (!hasRecords) {
254-
closeAndMoveToFinished(this.config.finishedPath, false);
215+
switch (this.config.cleanupPolicy) {
216+
case MOVE:
217+
closeAndMoveToFinished(this.config.finishedPath, false);
218+
break;
219+
case DELETE:
220+
closeAndDelete();
221+
break;
222+
}
255223

256224
File nextFile = this.inputFileDequeue.poll();
257225
if (null == nextFile) {
@@ -315,6 +283,21 @@ public List<SourceRecord> read() {
315283
}
316284
}
317285

286+
private void closeAndDelete() throws IOException {
287+
if (null != inputStream) {
288+
log.info("Closing {}", this.inputFile);
289+
this.inputStream.close();
290+
this.inputStream = null;
291+
this.inputFile.delete();
292+
File processingFile = InputFileDequeue.processingFile(this.config.processingFileExtension, this.inputFile);
293+
if (processingFile.exists()) {
294+
log.info("Removing processing file {}", processingFile);
295+
processingFile.delete();
296+
}
297+
298+
}
299+
}
300+
318301
protected void addRecord(List<SourceRecord> records, Struct keyStruct, Struct valueStruct) {
319302
Map<String, ?> sourceOffset = ImmutableMap.of(
320303
"offset",

0 commit comments

Comments
 (0)