Skip to content

Commit

Permalink
improve add_partitions performance
Browse files Browse the repository at this point in the history
  • Loading branch information
wecharyu committed Nov 7, 2024
1 parent 799b5cf commit 2e243ac
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,12 @@ public void execute(String batchQueryText, int batchRowCount) throws MetaExcepti
insertInBatch("\"PARTITION_KEY_VALS\"", columns, columnCount, rowCount, batchExecutionContext);
}

private void insertColumnDescriptorInBatch(Map<Long, MColumnDescriptor> cdIdToColumnDescriptor) throws MetaException {
int rowCount = cdIdToColumnDescriptor.size();
private void insertColumnDescriptorInBatch(Map<MColumnDescriptor, Long> columnDescriptorToId) throws MetaException {
int rowCount = columnDescriptorToId.size();
String columns = "(\"CD_ID\")";
int columnCount = 1;
BatchExecutionContext batchExecutionContext = new BatchExecutionContext() {
final Iterator<Long> it = cdIdToColumnDescriptor.keySet().iterator();
final Iterator<Long> it = columnDescriptorToId.values().iterator();
@Override
public void execute(String batchQueryText, int batchRowCount) throws MetaException {
Object[] params = new Object[batchRowCount * columnCount];
Expand All @@ -374,9 +374,9 @@ public void execute(String batchQueryText, int batchRowCount) throws MetaExcepti
insertInBatch("\"CDS\"", columns, columnCount, rowCount, batchExecutionContext);
}

private void insertColumnV2InBatch(Map<Long, MColumnDescriptor> cdIdToColumnDescriptor) throws MetaException {
private void insertColumnV2InBatch(Map<MColumnDescriptor, Long> columnDescriptorToId) throws MetaException {
int rowCount = 0;
for (MColumnDescriptor cd : cdIdToColumnDescriptor.values()) {
for (MColumnDescriptor cd : columnDescriptorToId.keySet()) {
rowCount += cd.getCols().size();
}
if (rowCount == 0) {
Expand All @@ -386,9 +386,9 @@ private void insertColumnV2InBatch(Map<Long, MColumnDescriptor> cdIdToColumnDesc
int columnCount = 5;
BatchExecutionContext batchExecutionContext = new BatchExecutionContext() {
int colIndex = 0;
final Iterator<Map.Entry<Long, MColumnDescriptor>> cdIt = cdIdToColumnDescriptor.entrySet().iterator();
Map.Entry<Long, MColumnDescriptor> cdEntry = cdIt.next();
Iterator<MFieldSchema> it = cdEntry.getValue().getCols().iterator();
final Iterator<Map.Entry<MColumnDescriptor, Long>> cdIt = columnDescriptorToId.entrySet().iterator();
Map.Entry<MColumnDescriptor, Long> cdEntry = cdIt.next();
Iterator<MFieldSchema> it = cdEntry.getKey().getCols().iterator();
@Override
public void execute(String batchQueryText, int batchRowCount) throws MetaException {
Object[] params = new Object[batchRowCount * columnCount];
Expand All @@ -407,7 +407,7 @@ public void execute(String batchQueryText, int batchRowCount) throws MetaExcepti
if (index < batchRowCount) {
colIndex = 0;
cdEntry = cdIt.next(); // cdIt.next() cannot be null since it is within the row count
it = cdEntry.getValue().getCols().iterator();
it = cdEntry.getKey().getCols().iterator();
}
} while (index < batchRowCount);
executeQuery(batchQueryText, params);
Expand Down Expand Up @@ -726,9 +726,9 @@ public void execute(String batchQueryText, int batchRowCount) throws MetaExcepti
* @throws MetaException
*/
public void addPartitions(List<MPartition> parts, List<List<MPartitionPrivilege>> partPrivilegesList,
List<List<MPartitionColumnPrivilege>> partColPrivilegesList) throws MetaException {
List<List<MPartitionColumnPrivilege>> partColPrivilegesList, Map<MColumnDescriptor, Long> cdToCdId) throws MetaException {
Map<Long, MSerDeInfo> serdeIdToSerDeInfo = new HashMap<>();
Map<Long, MColumnDescriptor> cdIdToColumnDescriptor = new HashMap<>();
// Map<Long, MColumnDescriptor> cdIdToColumnDescriptor = new HashMap<>();
Map<Long, MStorageDescriptor> sdIdToStorageDescriptor = new HashMap<>();
Map<Long, MPartition> partIdToPartition = new HashMap<>();
Map<Long, MPartitionPrivilege> partGrantIdToPrivilege = new HashMap<>();
Expand All @@ -753,14 +753,22 @@ public void addPartitions(List<MPartition> parts, List<List<MPartitionPrivilege>
Long serDeId = getDataStoreId(MSerDeInfo.class);
serdeIdToSerDeInfo.put(serDeId, sd.getSerDeInfo());

Long cdId;
DatastoreId storeId = (DatastoreId) pm.getObjectId(sd.getCD());
if (storeId == null) {
cdId = getDataStoreId(MColumnDescriptor.class);
cdIdToColumnDescriptor.put(cdId, sd.getCD());
} else {
cdId = (Long) storeId.getKeyAsObject();
}
// Long cdId;
// DatastoreId storeId = (DatastoreId) pm.getObjectId(sd.getCD());
// if (storeId == null) {
// cdId = getDataStoreId(MColumnDescriptor.class);
// cdIdToColumnDescriptor.put(cdId, sd.getCD());
// } else {
// cdId = (Long) storeId.getKeyAsObject();
// }
Long cdId = cdToCdId.computeIfAbsent(sd.getCD(), k -> {
try {
return getDataStoreId(MColumnDescriptor.class);
} catch (MetaException me) {
throw new RuntimeException(me);
}
});


Long sdId = getDataStoreId(MStorageDescriptor.class);
sdIdToStorageDescriptor.put(sdId, sd);
Expand Down Expand Up @@ -807,8 +815,10 @@ public void addPartitions(List<MPartition> parts, List<List<MPartitionPrivilege>
}
insertSerdeInBatch(serdeIdToSerDeInfo);
insertSerdeParamInBatch(serdeIdToSerDeInfo);
insertColumnDescriptorInBatch(cdIdToColumnDescriptor);
insertColumnV2InBatch(cdIdToColumnDescriptor);
// insertColumnDescriptorInBatch(cdIdToColumnDescriptor);
// insertColumnV2InBatch(cdIdToColumnDescriptor);
insertColumnDescriptorInBatch(cdToCdId);
insertColumnV2InBatch(cdToCdId);
insertStorageDescriptorInBatch(sdIdToStorageDescriptor, sdIdToSerdeId, sdIdToCdId);
insertStorageDescriptorParamInBatch(sdIdToStorageDescriptor);
insertBucketColInBatch(sdIdToStorageDescriptor);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
import org.apache.hadoop.hive.metastore.client.builder.GetPartitionsArgs;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
import org.apache.hadoop.hive.metastore.model.MColumnDescriptor;
import org.apache.hadoop.hive.metastore.model.MConstraint;
import org.apache.hadoop.hive.metastore.model.MCreationMetadata;
import org.apache.hadoop.hive.metastore.model.MDatabase;
Expand Down Expand Up @@ -534,7 +535,9 @@ public List<String> getMaterializedViewsForRewriting(String dbName) throws MetaE
*/
public void addPartitions(List<MPartition> parts, List<List<MPartitionPrivilege>> partPrivilegesList,
List<List<MPartitionColumnPrivilege>> partColPrivilegesList) throws MetaException {
directSqlInsertPart.addPartitions(parts, partPrivilegesList, partColPrivilegesList);
Map<MColumnDescriptor, Long> cdToCdId = new HashMap<>();
// put table cd which could be reused by partitions
directSqlInsertPart.addPartitions(parts, partPrivilegesList, partColPrivilegesList, cdToCdId);
}

/**
Expand Down

0 comments on commit 2e243ac

Please sign in to comment.