From ea8ab97e1b99301d469373084065a4c6bb60e810 Mon Sep 17 00:00:00 2001 From: wecharyu Date: Fri, 1 Dec 2023 16:20:32 +0800 Subject: [PATCH] add dropPartitionsByNames api in client --- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 19 ++++++++ .../apache/hadoop/hive/ql/metadata/Hive.java | 29 ++++++++++-- .../metadata/SessionHiveMetaStoreClient.java | 45 ++++++++++++------ .../hadoop/hive/ql/metadata/TestHive.java | 40 +++++++++++++++- .../hadoop/hive/metastore/HiveMetaHook.java | 13 +++++ .../hive/metastore/HiveMetaStoreClient.java | 44 +++++++++++++---- .../hive/metastore/IMetaStoreClient.java | 47 +++++++++++++++++++ .../HiveMetaStoreClientPreCatalog.java | 31 ++++++++++++ 8 files changed, 238 insertions(+), 30 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 9a108e5197262..5f9c4b8b0c1ca 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -43,9 +43,11 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.DropPartitionsExpr; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.RequestPartsSpec; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; @@ -1085,6 +1087,23 @@ public void preDropPartitions(org.apache.hadoop.hive.metastore.api.Table hmsTabl context.putToProperties(HiveMetaStoreClient.SKIP_DROP_PARTITION, "true"); } + @Override + public void preDropPartitions(org.apache.hadoop.hive.metastore.api.Table hmsTable, + EnvironmentContext context, RequestPartsSpec partsSpec) throws MetaException { + if (partsSpec.isSetExprs()) { + List exprs = partsSpec.getExprs(); + List> partExprs = Lists.newArrayList(); + for (DropPartitionsExpr expr : exprs) { + partExprs.add( + org.apache.commons.lang3.tuple.Pair.of(expr.getPartArchiveLevel(), expr.getExpr())); + } + preDropPartitions(hmsTable, context, partExprs); + } else if (partsSpec.isSetNames()) { + preTruncateTable(hmsTable, context, partsSpec.getNames()); + context.putToProperties(HiveMetaStoreClient.SKIP_DROP_PARTITION, "true"); + } + } + private class PreAlterTableProperties { private String tableLocation; private String format; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 11b52c51a7305..419119fb344e9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -113,8 +113,10 @@ import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest; import org.apache.hadoop.hive.metastore.api.CompactionRequest; import org.apache.hadoop.hive.metastore.api.CreateTableRequest; +import org.apache.hadoop.hive.metastore.api.DropPartitionsExpr; import org.apache.hadoop.hive.metastore.api.GetPartitionsByNamesRequest; import org.apache.hadoop.hive.metastore.api.GetTableRequest; +import org.apache.hadoop.hive.metastore.api.RequestPartsSpec; import org.apache.hadoop.hive.metastore.api.SourceTable; import org.apache.hadoop.hive.metastore.api.UpdateTransactionalStatsRequest; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; @@ -3994,6 +3996,27 @@ public boolean dropPartition(String dbName, String tableName, List parti public List dropPartitions(String dbName, String tableName, List> partitionExpressions, PartitionDropOptions dropOptions) throws HiveException { + RequestPartsSpec rps = new RequestPartsSpec(); + List exprs = new ArrayList<>(partitionExpressions.size()); + for (Pair partExpr : partitionExpressions) { + DropPartitionsExpr dpe = new DropPartitionsExpr(); + dpe.setExpr(partExpr.getRight()); + dpe.setPartArchiveLevel(partExpr.getLeft()); + exprs.add(dpe); + } + rps.setExprs(exprs); + return dropPartitions(dbName, tableName, rps, dropOptions); + } + + public List dropPartitionsByNames(String dbName, String tableName, + List partitionNames, PartitionDropOptions dropOptions) throws HiveException { + RequestPartsSpec partsSpec = new RequestPartsSpec(); + partsSpec.setNames(partitionNames); + return dropPartitions(dbName, tableName, partsSpec, dropOptions); + } + + public List dropPartitions(String dbName, String tableName, + RequestPartsSpec partsSpec, PartitionDropOptions dropOptions) throws HiveException { try { Table table = getTable(dbName, tableName); if (!dropOptions.deleteData) { @@ -4002,11 +4025,11 @@ public List dropPartitions(String dbName, String tableName, dropOptions.setWriteId(snapshot.getWriteId()); } long txnId = Optional.ofNullable(SessionState.get()) - .map(ss -> ss.getTxnMgr().getCurrentTxnId()).orElse(0L); + .map(ss -> ss.getTxnMgr().getCurrentTxnId()).orElse(0L); dropOptions.setTxnId(txnId); } - List partitions = getMSC().dropPartitions(dbName, tableName, - partitionExpressions, dropOptions); + List partitions = getMSC().dropPartitions( + getDefaultCatalog(conf), dbName, tableName, partsSpec, dropOptions); return convertFromMetastore(table, partitions); } catch (NoSuchObjectException e) { throw new HiveException("Partition or table doesn't exist.", e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 0e3dfb281b440..5fdf844002bed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException; import org.apache.hadoop.hive.metastore.api.CreateTableRequest; import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.DropPartitionsExpr; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest; @@ -98,6 +99,7 @@ import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; import org.apache.hadoop.hive.metastore.api.PrimaryKeysResponse; import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.RequestPartsSpec; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.TableMeta; import org.apache.hadoop.hive.metastore.api.TableStatsRequest; @@ -107,6 +109,7 @@ import org.apache.hadoop.hive.metastore.api.UniqueConstraintsResponse; import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.metastore.cache.CachedStore; import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder; import org.apache.hadoop.hive.metastore.parser.ExpressionTree; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; @@ -1437,26 +1440,38 @@ public boolean dropPartition(String catName, String dbName, String tableName, St @Override public List dropPartitions(String catName, String dbName, String tblName, - List> partExprs, PartitionDropOptions options) throws TException { + RequestPartsSpec partsSpec, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException { org.apache.hadoop.hive.metastore.api.Table table = getTempTable(dbName, tblName); if (table == null) { - return super.dropPartitions(catName, dbName, tblName, partExprs, options); + return super.dropPartitions(catName, dbName, tblName, partsSpec, options); } TempTable tt = getPartitionedTempTable(table); + List> partValues = new ArrayList<>(); + if (partsSpec.isSetExprs()) { + List exprs = partsSpec.getExprs(); + for (DropPartitionsExpr expr : exprs) { + String filter = generateJDOFilter(table, expr.getExpr(), conf.get(HiveConf.ConfVars.DEFAULT_PARTITION_NAME.varname)); + List partitions = tt.listPartitionsByFilter(filter); + for (Partition p : partitions) { + partValues.add(p.getValues()); + } + } + } else if (partsSpec.isSetNames()) { + List partNames = partsSpec.getNames(); + for (String partName : partNames) { + partValues.add(CachedStore.partNameToVals(partName)); + } + } List result = new ArrayList<>(); - for (Pair pair : partExprs) { - byte[] expr = pair.getRight(); - String filter = generateJDOFilter(table, expr, conf.get(HiveConf.ConfVars.DEFAULT_PARTITION_NAME.varname)); - List partitions = tt.listPartitionsByFilter(filter); - for (Partition p : partitions) { - Partition droppedPartition = tt.dropPartition(p.getValues()); - if (droppedPartition != null) { - result.add(droppedPartition); - boolean purgeData = options != null ? options.purgeData : true; - boolean deleteData = options != null ? options.deleteData : true; - if (deleteData && !tt.isExternal()) { - deletePartitionLocation(droppedPartition, purgeData); - } + for (List partValue : partValues) { + Partition droppedPartition = tt.dropPartition(partValue); + if (droppedPartition != null) { + result.add(droppedPartition); + boolean purgeData = options != null ? options.purgeData : true; + boolean deleteData = options != null ? options.deleteData : true; + if (deleteData && !tt.isExternal()) { + deletePartitionLocation(droppedPartition, purgeData); } } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java index bb7f754fc509e..2ab01e34f3592 100755 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java @@ -700,8 +700,7 @@ public void testDropMissingPartitionsByFilter() throws Throwable { hm.createPartition(table, partitionSpec); } - List partitions = hm.getPartitions(table); - assertEquals(3, partitions.size()); + assertEquals(3, hm.getPartitions(table).size()); // drop partitions by filter with missing predicate try { @@ -725,6 +724,43 @@ public void testDropMissingPartitionsByFilter() throws Throwable { } } + @Test + public void testDropPartitionsByNames() throws Throwable { + String dbName = Warehouse.DEFAULT_DATABASE_NAME; + String tableName = "table_for_testDropPartitionsByNames"; + + Table table = createPartitionedTable(dbName, tableName); + for (int i = 10; i <= 12; i++) { + Map partitionSpec = new ImmutableMap.Builder() + .put("ds", "20231129") + .put("hr", String.valueOf(i)) + .build(); + hm.createPartition(table, partitionSpec); + } + + List partitions = hm.getPartitions(table); + assertEquals(3, partitions.size()); + + List names = Arrays.asList("ds=20231129/hr=10"); + hm.dropPartitionsByNames(dbName, tableName, names, PartitionDropOptions.instance()); + assertEquals(2, hm.getPartitions(table).size()); + + try { + // drop missing partition name + names = Arrays.asList("ds=20231129/hr=10", "ds=20231129/hr=11"); + hm.dropPartitionsByNames(dbName, tableName, names, PartitionDropOptions.instance()); + fail("Expected exception"); + } catch (HiveException e) { + // expected + assertEquals("Some partitions to drop are missing", e.getCause().getMessage()); + assertEquals(2, hm.getPartitions(table).size()); + } + + names = Arrays.asList("ds=20231129/hr=12", "ds=20231129/hr=11"); + hm.dropPartitionsByNames(dbName, tableName, names, PartitionDropOptions.instance()); + assertEquals(0, hm.getPartitions(table).size()); + } + /** * Test that tables set up with auto-purge skip trash-directory when tables/partitions are dropped. * @throws Throwable diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java index 695a3282838c3..e72ecbb68d998 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.RequestPartsSpec; import org.apache.hadoop.hive.metastore.api.Table; import com.google.common.collect.ImmutableList; @@ -213,4 +214,16 @@ default void preDropPartitions(Table table, EnvironmentContext context, List> partExprs) throws MetaException { // Do nothing } + + /** + * Called before dropping the partitions from the table in the metastore during ALTER TABLE DROP PARTITION. + * @param table table whose partition needs to be dropped + * @param context context of the operation + * @param partsSpec request partition specification + * @throws MetaException + */ + default void preDropPartitions(Table table, + EnvironmentContext context, RequestPartsSpec partsSpec) throws MetaException { + // Do nothing + } } diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 9b7cbb82df671..7cddc15905bec 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -1933,24 +1933,32 @@ public List dropPartitions(String catName, String dbName, String tblN PartitionDropOptions options) throws TException { RequestPartsSpec rps = new RequestPartsSpec(); List exprs = new ArrayList<>(partExprs.size()); + for (Pair partExpr : partExprs) { + DropPartitionsExpr dpe = new DropPartitionsExpr(); + dpe.setExpr(partExpr.getRight()); + dpe.setPartArchiveLevel(partExpr.getLeft()); + exprs.add(dpe); + } + rps.setExprs(exprs); + return dropPartitions(catName, dbName, tblName, rps, options); + } + + @Override + public List dropPartitions(String catName, String dbName, String tblName, + RequestPartsSpec partsSpec, PartitionDropOptions options) + throws TException { + EnvironmentContext context = new EnvironmentContext(); Table table = getTable(catName, dbName, tblName); HiveMetaHook hook = getHook(table); - EnvironmentContext context = new EnvironmentContext(); if (hook != null) { - hook.preDropPartitions(table, context, partExprs); + hook.preDropPartitions(table, context, partsSpec); } if (context.getProperties() != null && Boolean.parseBoolean(context.getProperties().get(SKIP_DROP_PARTITION))) { return Lists.newArrayList(); } - for (Pair partExpr : partExprs) { - DropPartitionsExpr dpe = new DropPartitionsExpr(); - dpe.setExpr(partExpr.getRight()); - dpe.setPartArchiveLevel(partExpr.getLeft()); - exprs.add(dpe); - } - rps.setExprs(exprs); - DropPartitionsRequest req = new DropPartitionsRequest(dbName, tblName, rps); + + DropPartitionsRequest req = new DropPartitionsRequest(dbName, tblName, partsSpec); req.setCatName(catName); req.setDeleteData(options.deleteData); req.setNeedResult(options.returnResults); @@ -1975,6 +1983,22 @@ public List dropPartitions(String catName, String dbName, String tblN return client.drop_partitions_req(req).getPartitions(); } + @Override + public List dropPartitionsByNames(String dbName, String tblName, + List partitionNames, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException { + return dropPartitionsByNames(getDefaultCatalog(conf), dbName, tblName, partitionNames, options); + } + + @Override + public List dropPartitionsByNames(String catName, String dbName, String tblName, + List partitionNames, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException { + RequestPartsSpec rps = new RequestPartsSpec(); + rps.setNames(partitionNames); + return dropPartitions(catName, dbName, tblName, rps, options); + } + @Override public void dropTable(String dbname, String name, boolean deleteData, boolean ignoreUnknownTab) throws MetaException, TException, diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index d247c91c918b0..92e7f05a21f45 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -2174,6 +2174,53 @@ List dropPartitions(String catName, String dbName, String tblName, PartitionDropOptions options) throws NoSuchObjectException, MetaException, TException; + /** + * Generalization of dropPartitions(), + * @param catName catalog name + * @param dbName Name of the database + * @param tblName Name of the table + * @param partsSpec request partition specification + * @param options Boolean options for dropping partitions + * @return List of Partitions dropped + * @throws NoSuchObjectException No partition matches the expression(s), and ifExists was false. + * @throws MetaException error access the RDBMS or storage. + * @throws TException On failure + */ + List dropPartitions(String catName, String dbName, String tblName, + RequestPartsSpec partsSpec, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException; + + /** + * Generalization of dropPartitionsByNames(), + * @param dbName Name of the database + * @param tblName Name of the table + * @param partitionNames List of partition names + * @param options Boolean options for dropping partitions + * @return List of Partitions dropped + * @throws NoSuchObjectException No partition matches the partition name(s), and ifExists was false. + * @throws MetaException error access the RDBMS or storage. + * @throws TException On failure + */ + List dropPartitionsByNames(String dbName, String tblName, + List partitionNames, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException; + + /** + * Generalization of dropPartitionsByNames(), + * @param catName catalog name + * @param dbName Name of the database + * @param tblName Name of the table + * @param partitionNames List of partition names + * @param options Boolean options for dropping partitions + * @return List of Partitions dropped + * @throws NoSuchObjectException No partition matches the partition name(s), and ifExists was false. + * @throws MetaException error access the RDBMS or storage. + * @throws TException On failure + */ + List dropPartitionsByNames(String catName, String dbName, String tblName, + List partitionNames, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException; + /** * Drop a partition. * @param db_name database name. diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java index 96695ee1eaaf2..8ca1c96d58078 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java @@ -3625,6 +3625,37 @@ public List dropPartitions(String catName, String dbName, String tblN throw new UnsupportedOperationException(); } + @Override + public List dropPartitions(String catName, String dbName, String tblName, + RequestPartsSpec partsSpec, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException { + throw new UnsupportedOperationException(); + } + + @Override + public List dropPartitionsByNames(String dbName, String tblName, + List partitionNames, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException { + RequestPartsSpec rps = new RequestPartsSpec(); + rps.setNames(partitionNames); + DropPartitionsRequest req = new DropPartitionsRequest(dbName, tblName, rps); + req.setDeleteData(options.deleteData); + req.setNeedResult(options.returnResults); + req.setIfExists(options.ifExists); + if (options.purgeData) { + LOG.info("Dropped partitions will be purged!"); + req.setEnvironmentContext(getEnvironmentContextWithIfPurgeSet()); + } + return client.drop_partitions_req(req).getPartitions(); + } + + @Override + public List dropPartitionsByNames(String catName, String dbName, String tblName, + List partitionNames, PartitionDropOptions options) + throws NoSuchObjectException, MetaException, TException { + throw new UnsupportedOperationException(); + } + @Override public boolean dropPartition(String catName, String db_name, String tbl_name, String name, boolean deleteData) throws NoSuchObjectException, MetaException,