Skip to content

Commit 8164078

Browse files
Add Control file module files and validation (#2445)
Co-authored-by: Peckstadt Yves <[email protected]>
1 parent 8aafbb0 commit 8164078

File tree

5 files changed

+878
-0
lines changed

5 files changed

+878
-0
lines changed

core/src/main/java/com/scalar/db/common/error/CoreError.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,44 @@ public enum CoreError implements ScalarDbError {
764764
Category.USER_ERROR, "0165", "Missing namespace or table: %s, %s", "", ""),
765765
DATA_LOADER_TABLE_METADATA_RETRIEVAL_FAILED(
766766
Category.USER_ERROR, "0166", "Failed to retrieve table metadata. Details: %s", "", ""),
767+
DATA_LOADER_DUPLICATE_DATA_MAPPINGS(
768+
Category.USER_ERROR,
769+
"0167",
770+
"Duplicate data mappings found for table '%s' in the control file",
771+
"",
772+
""),
773+
DATA_LOADER_MISSING_COLUMN_MAPPING(
774+
Category.USER_ERROR,
775+
"0168",
776+
"No mapping found for column '%s' in table '%s' in the control file. Control file validation set at 'FULL'. All columns need to be mapped.",
777+
"",
778+
""),
779+
DATA_LOADER_CONTROL_FILE_MISSING_DATA_MAPPINGS(
780+
Category.USER_ERROR, "0169", "The control file is missing data mappings", "", ""),
781+
DATA_LOADER_TARGET_COLUMN_NOT_FOUND(
782+
Category.USER_ERROR,
783+
"0170",
784+
"The target column '%s' for source field '%s' could not be found in table '%s'",
785+
"",
786+
""),
787+
DATA_LOADER_MISSING_PARTITION_KEY(
788+
Category.USER_ERROR,
789+
"0171",
790+
"The required partition key '%s' is missing in the control file mapping for table '%s'",
791+
"",
792+
""),
793+
DATA_LOADER_MISSING_CLUSTERING_KEY(
794+
Category.USER_ERROR,
795+
"0172",
796+
"The required clustering key '%s' is missing in the control file mapping for table '%s'",
797+
"",
798+
""),
799+
DATA_LOADER_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND(
800+
Category.USER_ERROR,
801+
"0173",
802+
"Duplicated data mappings found for column '%s' in table '%s'",
803+
"",
804+
""),
767805

768806
//
769807
// Errors for the concurrency error category
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package com.scalar.db.dataloader.core.dataimport.controlfile;
2+
3+
import com.fasterxml.jackson.annotation.JsonCreator;
4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import java.util.ArrayList;
6+
import java.util.List;
7+
import lombok.Getter;
8+
import lombok.Setter;
9+
10+
/**
11+
* Represents a control file that holds control file tables which contains the column mappings that
12+
* maps a source file column to the actual database table column.
13+
*/
14+
@Getter
15+
@Setter
16+
public class ControlFile {
17+
18+
/**
19+
* A list of {@link ControlFileTable} objects representing the tables defined in the control file.
20+
*/
21+
@JsonProperty("tables")
22+
private final List<ControlFileTable> tables;
23+
24+
/** Default constructor that initializes an empty list of tables. */
25+
public ControlFile() {
26+
this.tables = new ArrayList<>();
27+
}
28+
29+
/**
30+
* Constructs a {@code ControlFile} with the specified list of tables.
31+
*
32+
* @param tables the list of {@link ControlFileTable} objects to initialize the control file with
33+
*/
34+
@JsonCreator
35+
public ControlFile(@JsonProperty("tables") List<ControlFileTable> tables) {
36+
this.tables = tables;
37+
}
38+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package com.scalar.db.dataloader.core.dataimport.controlfile;
2+
3+
/** Represents the control file */
4+
public class ControlFileValidationException extends Exception {
5+
6+
/**
7+
* Class constructor
8+
*
9+
* @param message error message
10+
*/
11+
public ControlFileValidationException(String message) {
12+
super(message);
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
package com.scalar.db.dataloader.core.dataimport.controlfile;
2+
3+
import com.scalar.db.api.TableMetadata;
4+
import com.scalar.db.common.error.CoreError;
5+
import com.scalar.db.dataloader.core.util.RuntimeUtil;
6+
import com.scalar.db.dataloader.core.util.TableMetadataUtil;
7+
import java.util.HashSet;
8+
import java.util.LinkedHashSet;
9+
import java.util.Map;
10+
import java.util.Set;
11+
12+
/** Class to validate a control file */
13+
public class ControlFileValidator {
14+
15+
/**
16+
* Validate a control file
17+
*
18+
* @param controlFile Control file instance
19+
* @param controlFileValidationMode Defines the strictness of the control file validation
20+
* @param tableMetadataMap Metadata for one or more ScalarDB tables
21+
* @throws ControlFileValidationException when the control file is invalid
22+
*/
23+
public static void validate(
24+
ControlFile controlFile,
25+
ControlFileValidationLevel controlFileValidationMode,
26+
Map<String, TableMetadata> tableMetadataMap)
27+
throws ControlFileValidationException {
28+
29+
// Method argument null check
30+
RuntimeUtil.checkNotNull(controlFile, controlFileValidationMode, tableMetadataMap);
31+
32+
// Make sure the control file is not empty
33+
checkEmptyMappings(controlFile);
34+
35+
// Table metadata existence and target column validation
36+
Set<String> uniqueTables = new HashSet<>();
37+
for (ControlFileTable controlFileTable : controlFile.getTables()) {
38+
String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable);
39+
40+
// Make sure that multiple table mappings for one table do not exist
41+
if (uniqueTables.contains(lookupKey)) {
42+
throw new ControlFileValidationException(
43+
CoreError.DATA_LOADER_DUPLICATE_DATA_MAPPINGS.buildMessage(lookupKey));
44+
}
45+
uniqueTables.add(lookupKey);
46+
47+
// Make sure no column is mapped multiple times
48+
Set<String> mappedTargetColumns = getTargetColumnSet(controlFileTable);
49+
50+
// Make sure table metadata is provided for each table mentioned in the data mappings
51+
checkMultiTableMetadata(tableMetadataMap, controlFileTable);
52+
53+
TableMetadata tableMetadata = tableMetadataMap.get(lookupKey);
54+
55+
// Make sure the specified target columns in the mappings actually exist
56+
checkIfTargetColumnExist(tableMetadata, controlFileTable);
57+
58+
// Make sure all table columns are mapped
59+
if (controlFileValidationMode == ControlFileValidationLevel.FULL) {
60+
checkIfAllColumnsAreMapped(tableMetadata, mappedTargetColumns, controlFileTable);
61+
continue;
62+
}
63+
64+
// Make sure all keys (partition keys and clustering keys) are mapped
65+
if (controlFileValidationMode == ControlFileValidationLevel.KEYS) {
66+
checkPartitionKeys(tableMetadata, mappedTargetColumns, controlFileTable);
67+
checkClusteringKeys(tableMetadata, mappedTargetColumns, controlFileTable);
68+
}
69+
}
70+
}
71+
72+
/**
73+
* Check that all table columns are mapped in the control file. Ran only when the control file
74+
* validation mode is set to 'FULL'
75+
*
76+
* @param tableMetadata Metadata for one ScalarDB table
77+
* @param mappedTargetColumns All target columns that are mapped in the control file
78+
* @param controlFileTable Control file entry for one ScalarDB table
79+
* @throws ControlFileValidationException when there is a column that is not mapped in the control
80+
* file
81+
*/
82+
private static void checkIfAllColumnsAreMapped(
83+
TableMetadata tableMetadata,
84+
Set<String> mappedTargetColumns,
85+
ControlFileTable controlFileTable)
86+
throws ControlFileValidationException {
87+
LinkedHashSet<String> columnNames = tableMetadata.getColumnNames();
88+
for (String columnName : columnNames) {
89+
if (!mappedTargetColumns.contains(columnName)) {
90+
throw new ControlFileValidationException(
91+
CoreError.DATA_LOADER_MISSING_COLUMN_MAPPING.buildMessage(
92+
columnName, TableMetadataUtil.getTableLookupKey(controlFileTable)));
93+
}
94+
}
95+
}
96+
97+
/**
98+
* Check that the control file has mappings for at least one table
99+
*
100+
* @param controlFile Control file instance
101+
* @throws ControlFileValidationException when the control file has no mappings for any table
102+
*/
103+
private static void checkEmptyMappings(ControlFile controlFile)
104+
throws ControlFileValidationException {
105+
// Make sure data mapping for at least one table is provided
106+
if (controlFile.getTables().isEmpty()) {
107+
throw new ControlFileValidationException(
108+
CoreError.DATA_LOADER_CONTROL_FILE_MISSING_DATA_MAPPINGS.buildMessage());
109+
}
110+
}
111+
112+
/**
113+
* Check that metadata is provided for each table that is mapped in the control file. If the table
114+
* metadata is missing this probably means the namespace and table combination does not exist.
115+
*
116+
* @param tableMetadataMap Metadata for one or more ScalarDB tables
117+
* @param controlFileTable Control file entry for one ScalarDB table
118+
* @throws ControlFileValidationException when metadata for a mapped table is missing
119+
*/
120+
private static void checkMultiTableMetadata(
121+
Map<String, TableMetadata> tableMetadataMap, ControlFileTable controlFileTable)
122+
throws ControlFileValidationException {
123+
// Make sure table metadata is available for each table data mapping
124+
String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable);
125+
if (!tableMetadataMap.containsKey(lookupKey)) {
126+
throw new ControlFileValidationException(
127+
CoreError.DATA_LOADER_MISSING_NAMESPACE_OR_TABLE.buildMessage(
128+
controlFileTable.getNamespace(), controlFileTable.getTable()));
129+
}
130+
}
131+
132+
/**
133+
* Check that the mapped target column exists in the provided table metadata.
134+
*
135+
* @param tableMetadata Metadata for the table
136+
* @param controlFileTable Control file entry for one ScalarDB table
137+
* @throws ControlFileValidationException when the target column does not exist
138+
*/
139+
private static void checkIfTargetColumnExist(
140+
TableMetadata tableMetadata, ControlFileTable controlFileTable)
141+
throws ControlFileValidationException {
142+
143+
String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable);
144+
LinkedHashSet<String> columnNames = tableMetadata.getColumnNames();
145+
146+
for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) {
147+
// Make sure the target fields are found in the table metadata
148+
if (!columnNames.contains(mapping.getTargetColumn())) {
149+
throw new ControlFileValidationException(
150+
CoreError.DATA_LOADER_TARGET_COLUMN_NOT_FOUND.buildMessage(
151+
mapping.getTargetColumn(), mapping.getSourceField(), lookupKey));
152+
}
153+
}
154+
}
155+
156+
/**
157+
* Check that the required partition keys are mapped in the control file. Ran only for control
158+
* file validation mode KEYS and FULL.
159+
*
160+
* @param tableMetadata Metadata for one ScalarDB table
161+
* @param mappedTargetColumns Set of target columns that are mapped in the control file
162+
* @param controlFileTable Control file entry for one ScalarDB table
163+
* @throws ControlFileValidationException when a partition key is not mapped
164+
*/
165+
private static void checkPartitionKeys(
166+
TableMetadata tableMetadata,
167+
Set<String> mappedTargetColumns,
168+
ControlFileTable controlFileTable)
169+
throws ControlFileValidationException {
170+
LinkedHashSet<String> partitionKeyNames = tableMetadata.getPartitionKeyNames();
171+
for (String partitionKeyName : partitionKeyNames) {
172+
if (!mappedTargetColumns.contains(partitionKeyName)) {
173+
throw new ControlFileValidationException(
174+
CoreError.DATA_LOADER_MISSING_PARTITION_KEY.buildMessage(
175+
partitionKeyName, TableMetadataUtil.getTableLookupKey(controlFileTable)));
176+
}
177+
}
178+
}
179+
180+
/**
181+
* Check that the required clustering keys are mapped in the control file. Ran only for control
182+
* file validation mode KEYS and FULL.
183+
*
184+
* @param tableMetadata Metadata for one ScalarDB table
185+
* @param mappedTargetColumns Set of target columns that are mapped in the control file
186+
* @param controlFileTable Control file entry for one ScalarDB table
187+
* @throws ControlFileValidationException when a clustering key is not mapped
188+
*/
189+
private static void checkClusteringKeys(
190+
TableMetadata tableMetadata,
191+
Set<String> mappedTargetColumns,
192+
ControlFileTable controlFileTable)
193+
throws ControlFileValidationException {
194+
LinkedHashSet<String> clusteringKeyNames = tableMetadata.getClusteringKeyNames();
195+
for (String clusteringKeyName : clusteringKeyNames) {
196+
if (!mappedTargetColumns.contains(clusteringKeyName)) {
197+
throw new ControlFileValidationException(
198+
CoreError.DATA_LOADER_MISSING_CLUSTERING_KEY.buildMessage(
199+
clusteringKeyName, TableMetadataUtil.getTableLookupKey(controlFileTable)));
200+
}
201+
}
202+
}
203+
204+
/**
205+
* Check that a control file table mapping does not contain duplicate mappings for the same target
206+
* column and creates a set of unique mappings
207+
*
208+
* @param controlFileTable Control file entry for one ScalarDB table
209+
* @return Set of uniquely mapped target columns
210+
* @throws ControlFileValidationException when a duplicate mapping is found
211+
*/
212+
private static Set<String> getTargetColumnSet(ControlFileTable controlFileTable)
213+
throws ControlFileValidationException {
214+
Set<String> mappedTargetColumns = new HashSet<>();
215+
for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) {
216+
if (!mappedTargetColumns.add(mapping.getTargetColumn())) {
217+
throw new ControlFileValidationException(
218+
CoreError.DATA_LOADER_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND.buildMessage(
219+
mapping.getTargetColumn(), TableMetadataUtil.getTableLookupKey(controlFileTable)));
220+
}
221+
}
222+
return mappedTargetColumns;
223+
}
224+
}

0 commit comments

Comments
 (0)