Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support to schedule partitions that already exist in table #189

Merged
merged 7 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package com.expediagroup.beekeeper.api.error;

import java.time.LocalDateTime;

import javax.servlet.http.HttpServletRequest;

import org.springframework.data.mapping.PropertyReferenceException;
Expand All @@ -24,8 +26,6 @@
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.RestControllerAdvice;

import java.time.LocalDateTime;

@RestControllerAdvice
public class BeekeeperExceptionHandler {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2023 Expedia, Inc.
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,24 +15,22 @@
*/
package com.expediagroup.beekeeper.api.error;

import static org.assertj.core.api.Assertions.assertThat;

import java.util.Collections;
import java.util.List;

import org.junit.jupiter.api.Test;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockHttpServletRequest;
import org.springframework.data.mapping.PropertyReferenceException;
import org.springframework.data.mapping.PropertyPath;
import org.springframework.data.mapping.PropertyReferenceException;
import org.springframework.data.util.ClassTypeInformation;
import org.springframework.data.util.TypeInformation;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockHttpServletRequest;

import static org.assertj.core.api.Assertions.assertThat;

import com.expediagroup.beekeeper.api.error.BeekeeperExceptionHandler;
import com.expediagroup.beekeeper.api.error.ErrorResponse;
import com.expediagroup.beekeeper.core.model.HousekeepingPath;

import java.util.Collections;
import java.util.List;

public class BeekeeperExceptionHandlerTest {

private final BeekeeperExceptionHandler exceptionHandler = new BeekeeperExceptionHandler();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2023 Expedia, Inc.
* Copyright (C) 2019-2025 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -145,4 +145,20 @@ void deleteScheduledOrFailedPartitionRecordsForTable(
@Query(value = "delete from HousekeepingMetadata t where t.cleanupTimestamp < :instant "
+ "and (t.housekeepingStatus = 'DELETED' or t.housekeepingStatus = 'DISABLED')")
void cleanUpOldDeletedRecords(@Param("instant") LocalDateTime instant);

/**
* Returns the list of partitions of table that are schedule or failed, if there is one.
*
* @param databaseName
* @param tableName
* @return List of records that match the inputs given.
*/
@Query(value = "from HousekeepingMetadata t "
+ "where t.databaseName = :databaseName "
+ "and t.tableName = :tableName "
+ "and t.partitionName IS NOT NULL "
+ "and (t.housekeepingStatus = 'SCHEDULED' or t.housekeepingStatus = 'FAILED')")
List<HousekeepingMetadata> findRecordsForCleanupByDbAndTableName(
@Param("databaseName") String databaseName,
@Param("tableName") String tableName);
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2024 Expedia, Inc.
* Copyright (C) 2019-2025 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,8 +15,12 @@
*/
package com.expediagroup.beekeeper.integration;

import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY;
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY;
import static org.assertj.core.api.Assertions.assertThat;
import static org.awaitility.Awaitility.await;
import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3;
import static org.testcontainers.containers.localstack.LocalStackContainer.Service.SQS;

import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SCHEDULED;
Expand All @@ -33,9 +37,12 @@
import java.net.URISyntaxException;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;

import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
Expand All @@ -45,6 +52,7 @@
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.testcontainers.containers.localstack.LocalStackContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
Expand All @@ -53,9 +61,12 @@
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.composite.CompositeMeterRegistry;

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.CreateBucketRequest;
import com.amazonaws.services.sqs.AmazonSQS;
import com.amazonaws.services.sqs.model.PurgeQueueRequest;
import com.amazonaws.services.sqs.model.SendMessageRequest;
import com.google.common.collect.ImmutableMap;

import com.expediagroup.beekeeper.core.model.HousekeepingMetadata;
import com.expediagroup.beekeeper.core.model.PeriodDuration;
Expand All @@ -65,8 +76,11 @@
import com.expediagroup.beekeeper.integration.model.AlterTableSqsMessage;
import com.expediagroup.beekeeper.integration.model.CreateTableSqsMessage;
import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils;
import com.expediagroup.beekeeper.integration.utils.HiveTestUtils;
import com.expediagroup.beekeeper.scheduler.apiary.BeekeeperSchedulerApiary;

import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension;

@Testcontainers
public class BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase {

Expand All @@ -75,6 +89,7 @@ public class BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest extends Beek

protected static final String QUEUE = "apiary-receiver-queue";
protected static final String SCHEDULED_EXPIRED_METRIC = "metadata-scheduled";
protected static final String METASTORE_URI_PROPERTY = "properties.metastore-uri";
protected static final String HEALTHCHECK_URI = "http://localhost:8080/actuator/health";
protected static final String PROMETHEUS_URI = "http://localhost:8080/actuator/prometheus";

Expand All @@ -85,10 +100,41 @@ public class BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest extends Beek
protected static final String PARTITION_B_NAME = "event_date=2020-01-01/event_hour=1";
protected static final String LOCATION_A = "s3://bucket/table1/partition";
protected static final String LOCATION_B = "s3://bucket/table2/partition";
protected static final String BUCKET = "test-path-bucket";
protected static final String PARTITIONED_TABLE_NAME = TABLE_NAME_VALUE + "_partitioned";
protected static final String ROOT_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/";
protected static final String PARTITIONED_TABLE_PATH = ROOT_PATH + PARTITIONED_TABLE_NAME;
protected static final String PARTITION_ROOT_PATH = ROOT_PATH + "some_location/";
protected static final List<String> PARTITION_VALUES = List.of("2020-01-01", "0", "A");

protected static final String S3_ACCESS_KEY = "access";
protected static final String S3_SECRET_KEY = "secret";

@Container
protected static final LocalStackContainer SQS_CONTAINER = ContainerTestUtils.awsContainer(SQS);
@Container
protected static final LocalStackContainer S3_CONTAINER = ContainerTestUtils.awsContainer(S3);

protected static AmazonSQS amazonSQS;
protected HiveTestUtils hiveTestUtils;
protected HiveMetaStoreClient metastoreClient;

static {
S3_CONTAINER.start();
}

protected static AmazonS3 amazonS3;

private static Map<String, String> metastoreProperties = ImmutableMap
.<String, String>builder()
.put(ENDPOINT, ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3))
.put(ACCESS_KEY, S3_ACCESS_KEY)
.put(SECRET_KEY, S3_SECRET_KEY)
.build();

@RegisterExtension
protected ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension(
DATABASE_NAME_VALUE, metastoreProperties);

@BeforeAll
public static void init() {
Expand All @@ -97,18 +143,29 @@ public static void init() {

amazonSQS = ContainerTestUtils.sqsClient(SQS_CONTAINER, AWS_REGION);
amazonSQS.createQueue(QUEUE);

amazonS3 = ContainerTestUtils.s3Client(S3_CONTAINER, AWS_REGION);
amazonS3.createBucket(new CreateBucketRequest(BUCKET, AWS_REGION));
}

@AfterAll
public static void teardown() {
System.clearProperty(APIARY_QUEUE_URL_PROPERTY);
System.clearProperty(METASTORE_URI_PROPERTY);

amazonSQS.shutdown();
amazonS3.shutdown();
}

@BeforeEach
public void setup() {
System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri());
metastoreClient = thriftHiveMetaStore.client();
hiveTestUtils = new HiveTestUtils(metastoreClient);

amazonSQS.purgeQueue(new PurgeQueueRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE)));
amazonS3.listObjectsV2(BUCKET).getObjectSummaries()
.forEach(object -> amazonS3.deleteObject(BUCKET, object.getKey()));
executorService.execute(() -> BeekeeperSchedulerApiary.main(new String[] {}));
await().atMost(Duration.ONE_MINUTE).until(BeekeeperSchedulerApiary::isRunning);
}
Expand Down Expand Up @@ -156,7 +213,7 @@ public void expiredMetadataAddPartitionEvent() throws SQLException, IOException,

List<HousekeepingMetadata> expiredMetadata = getExpiredMetadata();
// check first entry is for the table
assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null);
assertThat(expiredMetadata.get(0).getPartitionName()).isNull();
assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME);
}

Expand All @@ -176,7 +233,7 @@ public void expiredMetadataMultipleAddPartitionEvents() throws SQLException, IOE

List<HousekeepingMetadata> expiredMetadata = getExpiredMetadata();
// check first entry is for the table
assertThat(expiredMetadata.get(0).getPartitionName()).isEqualTo(null);
assertThat(expiredMetadata.get(0).getPartitionName()).isNull();
assertExpiredMetadata(expiredMetadata.get(1), LOCATION_A, PARTITION_A_NAME);
assertExpiredMetadata(expiredMetadata.get(2), LOCATION_B, PARTITION_B_NAME);
}
Expand Down Expand Up @@ -222,7 +279,7 @@ public void expiredMetadataCreateIcebergTableEvent() throws SQLException, IOExce
await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 0);

List<HousekeepingMetadata> expiredMetadata = getExpiredMetadata();
assertThat(expiredMetadata.size()).isEqualTo(0);
assertThat(expiredMetadata).isEmpty();
}

@Test
Expand All @@ -240,6 +297,43 @@ public void testEventAddedToHistoryTable() throws SQLException, IOException, URI
assertThat(history.getHousekeepingStatus()).isEqualTo(SCHEDULED.name());
}

@Test
void scheduleExistingPartitionsWhenPropertiesExpireInTable() throws Exception {
Table table = hiveTestUtils.createTable(PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true, true);
hiveTestUtils.addPartitionsToTable(PARTITION_ROOT_PATH, table, PARTITION_VALUES);
hiveTestUtils.addPartitionsToTable(PARTITION_ROOT_PATH, table, List.of("2020-01-01", "1", "B"));

AlterTableSqsMessage alterTableSqsMessage = new AlterTableSqsMessage(LOCATION_A, true);
amazonSQS.sendMessage(sendMessageRequest(alterTableSqsMessage.getFormattedString()));

await()
.atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 3);

assertThat(metastoreClient.tableExists(DATABASE_NAME_VALUE, TABLE_NAME_VALUE)).isTrue();
List<HousekeepingMetadata> expiredMetadata = getExpiredMetadata();
assertThat(expiredMetadata).hasSize(3);
}

@Test
void scheduleMissingPartitionsWhenPropertiesExpireInTable() throws Exception {
Table table = hiveTestUtils.createTable(PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true, true);
hiveTestUtils.addPartitionsToTable(PARTITION_ROOT_PATH, table, PARTITION_VALUES);
hiveTestUtils.addPartitionsToTable(PARTITION_ROOT_PATH, table, List.of("2020-01-01", "1", "B"));

insertExpiredMetadata(PARTITION_ROOT_PATH, null);
insertExpiredMetadata(PARTITION_ROOT_PATH + PARTITION_A_NAME + "/event_type=C", PARTITION_A_NAME + "/event_type=C");

AlterTableSqsMessage alterTableSqsMessage = new AlterTableSqsMessage(PARTITION_ROOT_PATH, true);
amazonSQS.sendMessage(sendMessageRequest(alterTableSqsMessage.getFormattedString()));

await()
.atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 4);

assertThat(metastoreClient.tableExists(DATABASE_NAME_VALUE, TABLE_NAME_VALUE)).isTrue();
List<HousekeepingMetadata> expiredMetadata = getExpiredMetadata();
assertThat(expiredMetadata).hasSize(4);
}

@Test
public void healthCheck() {
CloseableHttpClient client = HttpClientBuilder.create().build();
Expand All @@ -253,7 +347,7 @@ public void healthCheck() {
public void prometheus() {
CloseableHttpClient client = HttpClientBuilder.create().build();
HttpGet request = new HttpGet(PROMETHEUS_URI);
await().atMost(30, TimeUnit.SECONDS).until(() -> client.execute(request).getStatusLine().getStatusCode() == 200);
await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> client.execute(request).getStatusLine().getStatusCode() == 200);
}

protected SendMessageRequest sendMessageRequest(String payload) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
package com.expediagroup.beekeeper.integration;

import static java.util.Collections.emptyMap;

import static org.assertj.core.api.Assertions.assertThat;
import static org.awaitility.Awaitility.await;
import static org.testcontainers.containers.localstack.LocalStackContainer.Service.SQS;
Expand Down Expand Up @@ -45,6 +47,7 @@
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.testcontainers.containers.localstack.LocalStackContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
Expand All @@ -67,11 +70,14 @@
import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils;
import com.expediagroup.beekeeper.scheduler.apiary.BeekeeperSchedulerApiary;

import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension;

@Testcontainers
public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase {

protected static final int TIMEOUT = 5;
protected static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url";
protected static final String METASTORE_URI_PROPERTY = "properties.metastore-uri";

protected static final String QUEUE = "apiary-receiver-queue";
protected static final String SCHEDULED_ORPHANED_METRIC = "paths-scheduled";
Expand All @@ -82,6 +88,10 @@ public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends Bee
protected static final LocalStackContainer SQS_CONTAINER = ContainerTestUtils.awsContainer(SQS);
protected static AmazonSQS amazonSQS;

@RegisterExtension
protected ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension(
DATABASE_NAME_VALUE, emptyMap());

@BeforeAll
public static void init() {
String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE);
Expand All @@ -100,6 +110,8 @@ public static void teardown() {

@BeforeEach
public void setup() {
System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri());

amazonSQS.purgeQueue(new PurgeQueueRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE)));
executorService.execute(() -> BeekeeperSchedulerApiary.main(new String[] {}));
await().atMost(Duration.ONE_MINUTE).until(BeekeeperSchedulerApiary::isRunning);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2023 Expedia, Inc.
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,7 +20,6 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.springframework.http.HttpStatus.OK;

import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SCHEDULED;
import static com.expediagroup.beekeeper.integration.CommonTestVariables.CLEANUP_ATTEMPTS_VALUE;
import static com.expediagroup.beekeeper.integration.CommonTestVariables.CLIENT_ID_FIELD;
import static com.expediagroup.beekeeper.integration.CommonTestVariables.CREATION_TIMESTAMP_VALUE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
import com.expediagroup.beekeeper.cleanup.hive.HiveClientFactory;
import com.expediagroup.beekeeper.cleanup.metadata.MetadataCleaner;
import com.expediagroup.beekeeper.cleanup.path.PathCleaner;
import com.expediagroup.beekeeper.core.error.BeekeeperException;
import com.expediagroup.beekeeper.core.model.HousekeepingMetadata;
import com.expediagroup.beekeeper.core.model.HousekeepingPath;
import com.expediagroup.beekeeper.core.model.HousekeepingStatus;
Expand Down
Loading