Skip to content

Commit aa4382d

Browse files
Fix #3167: avoid infinite wait on failure to init curl handle (#3200)
1 parent 09da88b commit aa4382d

File tree

3 files changed

+157
-1
lines changed

3 files changed

+157
-1
lines changed

src/aws-cpp-sdk-core/include/aws/core/utils/ResourceManager.h

+29
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,34 @@ namespace Aws
7474
return resource;
7575
}
7676

77+
/**
78+
* Returns a resource with exclusive ownership or a nullptr.
79+
* If resource is available within the wait timeout then the resource is returned.
80+
* otherwise (if the timeout has expired or container is shutdown) the nullptr is returned.
81+
* You must call Release on the resource when you are finished.
82+
* This method is enabled only for pointer RESOURCE_TYPE type.
83+
*
84+
* @return instance of RESOURCE_TYPE, nullptr if the resource manager is being shutdown
85+
*/
86+
template <typename std::enable_if<std::is_pointer<RESOURCE_TYPE>::value>::type* = nullptr>
87+
RESOURCE_TYPE TryAcquire(const uint64_t timeoutMs) {
88+
std::unique_lock<std::mutex> locker(m_queueLock);
89+
bool hasResource = m_shutdown.load() || !m_resources.empty();
90+
if (!hasResource) {
91+
hasResource = m_semaphore.wait_for(locker, std::chrono::milliseconds(timeoutMs),
92+
[&]() { return m_shutdown.load() || !m_resources.empty(); });
93+
}
94+
95+
if (m_shutdown || !hasResource) {
96+
return nullptr;
97+
}
98+
99+
RESOURCE_TYPE resource = m_resources.back();
100+
m_resources.pop_back();
101+
102+
return resource;
103+
}
104+
77105
/**
78106
* Returns whether or not resources are currently available for acquisition
79107
*
@@ -122,6 +150,7 @@ namespace Aws
122150
{
123151
std::unique_lock<std::mutex> locker(m_queueLock);
124152
m_shutdown = true;
153+
m_semaphore.notify_all();
125154

126155
//wait for all acquired resources to be released.
127156
while (m_resources.size() < resourceCount)

src/aws-cpp-sdk-core/source/http/curl/CurlHandleContainer.cpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ CurlHandleContainer::~CurlHandleContainer()
3232
AWS_LOGSTREAM_DEBUG(CURL_HANDLE_CONTAINER_TAG, "Cleaning up " << handle);
3333
curl_easy_cleanup(handle);
3434
}
35+
m_poolSize = 0;
3536
}
3637

3738
CURL* CurlHandleContainer::AcquireCurlHandle()
@@ -44,7 +45,20 @@ CURL* CurlHandleContainer::AcquireCurlHandle()
4445
CheckAndGrowPool();
4546
}
4647

47-
CURL* handle = m_handleContainer.TryAcquire();
48+
// TODO: 1.12: start to fail instead of infinite loop, possibly introduce another timeout config field
49+
CURL* handle = nullptr;
50+
bool errorLogged = false; // avoid log explosion on legacy app behavior
51+
while (!handle) {
52+
constexpr unsigned long ACQUIRE_TIMEOUT = 1000l; // some big enough arbitrary value, possibly need a user config or just fail ASAP.
53+
handle = m_handleContainer.TryAcquire(ACQUIRE_TIMEOUT);
54+
if (!handle && !errorLogged) {
55+
AWS_LOGSTREAM_ERROR(CURL_HANDLE_CONTAINER_TAG,
56+
"Unable to Acquire a curl handle within 1 second. "
57+
"Waiting further, this method will start failing in 1.12.x. "
58+
"Please increase the pool size.");
59+
errorLogged = true;
60+
}
61+
}
4862
AWS_LOGSTREAM_DEBUG(CURL_HANDLE_CONTAINER_TAG, "Connection has been released. Continuing.");
4963
AWS_LOGSTREAM_DEBUG(CURL_HANDLE_CONTAINER_TAG, "Returning connection handle " << handle);
5064
return handle;
@@ -80,10 +94,16 @@ void CurlHandleContainer::DestroyCurlHandle(CURL* handle)
8094
// If the handle is not released back to the pool, it could create a deadlock
8195
// Create a new handle and release that into the pool
8296
handle = CreateCurlHandleInPool();
97+
if (!handle && m_poolSize) {
98+
m_poolSize -= 1;
99+
}
83100
}
84101
if (handle)
85102
{
86103
AWS_LOGSTREAM_DEBUG(CURL_HANDLE_CONTAINER_TAG, "Created replacement handle and released to pool: " << handle);
104+
} else {
105+
AWS_LOGSTREAM_ERROR(CURL_HANDLE_CONTAINER_TAG,
106+
"Failed to create a replacement handle. The handle pool size reduced to " << m_poolSize);
87107
}
88108
}
89109

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/**
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0.
4+
*/
5+
6+
#include <aws/core/utils/ResourceManager.h>
7+
#include <aws/testing/AwsCppSdkGTestSuite.h>
8+
9+
#include <future>
10+
#include <random>
11+
12+
using namespace Aws::Utils;
13+
14+
class ExclusiveOwnershipResourceManagerTest : public Aws::Testing::AwsCppSdkGTestSuite {};
15+
16+
TEST_F(ExclusiveOwnershipResourceManagerTest, TestAcquire) {
17+
class TestResource {
18+
public:
19+
TestResource(int startVal) : m_value(startVal) {}
20+
21+
int GetValue() { return m_value; }
22+
23+
void DoSomething() {
24+
std::unique_lock<std::mutex> lock(m_lock, std::defer_lock);
25+
26+
if (!lock.try_lock()) {
27+
m_fail = true; // concurrent access detected
28+
}
29+
m_value += 1;
30+
}
31+
32+
bool Fail() { return m_fail; }
33+
34+
protected:
35+
int m_value = 0;
36+
std::mutex m_lock;
37+
bool m_fail = false;
38+
};
39+
40+
ExclusiveOwnershipResourceManager<TestResource*> resourceManagerInTest;
41+
42+
ASSERT_FALSE(resourceManagerInTest.HasResourcesAvailable());
43+
ASSERT_EQ(nullptr, resourceManagerInTest.TryAcquire(1l));
44+
45+
static const size_t RESOURCES = 6;
46+
TestResource* resources[RESOURCES] = {};
47+
for (int i = 0; i < (int)RESOURCES; ++i) {
48+
resources[i] = new TestResource(i);
49+
resourceManagerInTest.Release(resources[i]);
50+
}
51+
52+
static const size_t THREADS = 8;
53+
Aws::Vector<std::future<bool>> futures(THREADS);
54+
55+
auto useResourceFn = [](ExclusiveOwnershipResourceManager<TestResource*>& resourceManager) -> bool {
56+
std::random_device rd;
57+
std::mt19937_64 randGen(rd());
58+
std::uniform_int_distribution<int64_t> randDist(0, 100);
59+
60+
for (size_t j = 0; j < 50; ++j) {
61+
TestResource* resource = nullptr;
62+
while (!resource) {
63+
resource = resourceManager.TryAcquire(randDist(randGen));
64+
}
65+
// "do something" for rand ms
66+
int64_t sleepFor = randDist(randGen);
67+
std::this_thread::sleep_for(std::chrono::milliseconds(sleepFor));
68+
const int before = resource->GetValue();
69+
resource->DoSomething();
70+
EXPECT_NE(before, resource->GetValue());
71+
resourceManager.Release(resource);
72+
}
73+
return true;
74+
};
75+
76+
for (size_t i = 0; i < THREADS; ++i) {
77+
futures[i] = std::async([&resourceManagerInTest, &useResourceFn]() -> bool { return useResourceFn(resourceManagerInTest); });
78+
}
79+
80+
for (size_t i = 0; i < THREADS; ++i) {
81+
bool batchResult = futures[i].get();
82+
ASSERT_TRUE(batchResult);
83+
}
84+
85+
// single thread test
86+
Aws::Vector<TestResource*> inUse;
87+
for (int i = 0; i < (int)RESOURCES; ++i) {
88+
TestResource* resource = resourceManagerInTest.TryAcquire(0l);
89+
ASSERT_TRUE(resource);
90+
const int before = resource->GetValue();
91+
resource->DoSomething();
92+
EXPECT_NE(before, resource->GetValue());
93+
inUse.push_back(resource);
94+
}
95+
for (auto resource : inUse) {
96+
resourceManagerInTest.Release(resource);
97+
}
98+
99+
Aws::Vector<TestResource*> released = resourceManagerInTest.ShutdownAndWait(RESOURCES);
100+
ASSERT_EQ(RESOURCES, released.size());
101+
102+
for (int i = 0; i < (int)RESOURCES; ++i) {
103+
ASSERT_FALSE(resources[i]->Fail());
104+
ASSERT_NE(i, resources[i]->GetValue());
105+
delete resources[i];
106+
}
107+
}

0 commit comments

Comments
 (0)