Skip to content

Commit 38c4fce

Browse files
committed
[Concurrency] Fix a race when using cancelAll on a task group concurrently with child tasks being removed.
_swift_taskGroup_cancelAllChildren relies on there being no concurrent modification when called from the owning task, but this is not guaranteed. Rearrange things to always take the owning task's status record lock when walking the group's children. Split _swift_taskGroup_cancelAllChildren into two functions, one which assumes/requires the lock is already held, and one which acquires the lock. We don't have the owning task in this case, but we can either get it from the current task, or by looking at the parent of the child task we're working on. rdar://147172991
1 parent 3a4549b commit 38c4fce

File tree

4 files changed

+66
-36
lines changed

4 files changed

+66
-36
lines changed

include/swift/ABI/TaskStatus.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,13 @@ class ChildTaskStatusRecord : public TaskStatusRecord {
133133
/// Group child tasks DO NOT have their own `ChildTaskStatusRecord` entries,
134134
/// and are only tracked by their respective `TaskGroupTaskStatusRecord`.
135135
class TaskGroupTaskStatusRecord : public TaskStatusRecord {
136-
public:
137-
AsyncTask *FirstChild;
136+
// FirstChild may be read concurrently to check for the presence of children,
137+
// so it needs to be atomic. The pointer is never dereferenced in that case,
138+
// so we can universally use memory_order_relaxed on it.
139+
std::atomic<AsyncTask *> FirstChild;
138140
AsyncTask *LastChild;
139141

142+
public:
140143
TaskGroupTaskStatusRecord()
141144
: TaskStatusRecord(TaskStatusRecordKind::TaskGroup),
142145
FirstChild(nullptr),
@@ -155,7 +158,9 @@ class TaskGroupTaskStatusRecord : public TaskStatusRecord {
155158
/// Return the first child linked by this record. This may be null;
156159
/// if not, it (and all of its successors) are guaranteed to satisfy
157160
/// `isChildTask()`.
158-
AsyncTask *getFirstChild() const { return FirstChild; }
161+
AsyncTask *getFirstChild() const {
162+
return FirstChild.load(std::memory_order_relaxed);
163+
}
159164

160165
/// Attach the passed in `child` task to this group.
161166
void attachChild(AsyncTask *child) {
@@ -165,9 +170,9 @@ class TaskGroupTaskStatusRecord : public TaskStatusRecord {
165170
auto oldLastChild = LastChild;
166171
LastChild = child;
167172

168-
if (!FirstChild) {
173+
if (!getFirstChild()) {
169174
// This is the first child we ever attach, so store it as FirstChild.
170-
FirstChild = child;
175+
FirstChild.store(child, std::memory_order_relaxed);
171176
return;
172177
}
173178

@@ -176,15 +181,18 @@ class TaskGroupTaskStatusRecord : public TaskStatusRecord {
176181

177182
void detachChild(AsyncTask *child) {
178183
assert(child && "cannot remove a null child from group");
179-
if (FirstChild == child) {
180-
FirstChild = getNextChildTask(child);
181-
if (FirstChild == nullptr) {
184+
185+
AsyncTask *prev = getFirstChild();
186+
187+
if (prev == child) {
188+
AsyncTask *next = getNextChildTask(child);
189+
FirstChild.store(next, std::memory_order_relaxed);
190+
if (next == nullptr) {
182191
LastChild = nullptr;
183192
}
184193
return;
185194
}
186195

187-
AsyncTask *prev = FirstChild;
188196
// Remove the child from the linked list, i.e.:
189197
// prev -> afterPrev -> afterChild
190198
// ==

stdlib/public/Concurrency/TaskGroup.cpp

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,8 @@ class TaskGroupBase : public TaskGroupTaskStatusRecord {
474474
bool statusCancel();
475475

476476
/// Cancel the group and all of its child tasks recursively.
477-
/// This also sets
478-
bool cancelAll();
479-
477+
/// This also sets the cancelled bit in the group status.
478+
bool cancelAll(AsyncTask *task);
480479
};
481480

482481
#if !SWIFT_CONCURRENCY_EMBEDDED
@@ -1378,7 +1377,8 @@ void DiscardingTaskGroup::offer(AsyncTask *completedTask, AsyncContext *context)
13781377
// Discarding results mode immediately treats a child failure as group cancellation.
13791378
// "All for one, one for all!" - any task failing must cause the group and all sibling tasks to be cancelled,
13801379
// such that the discarding group can exit as soon as possible.
1381-
cancelAll();
1380+
auto parent = completedTask->childFragment()->getParent();
1381+
cancelAll(parent);
13821382

13831383
if (afterComplete.hasWaitingTask() && afterComplete.pendingTasks(this) == 0) {
13841384
// We grab the waiting task while holding the group lock, because this
@@ -2097,10 +2097,12 @@ static bool swift_taskGroup_isCancelledImpl(TaskGroup *group) {
20972097

20982098
SWIFT_CC(swift)
20992099
static void swift_taskGroup_cancelAllImpl(TaskGroup *group) {
2100-
asBaseImpl(group)->cancelAll();
2100+
// TaskGroup is not a Sendable type, so this can only be called from the
2101+
// owning task.
2102+
asBaseImpl(group)->cancelAll(swift_task_getCurrent());
21012103
}
21022104

2103-
bool TaskGroupBase::cancelAll() {
2105+
bool TaskGroupBase::cancelAll(AsyncTask *owningTask) {
21042106
SWIFT_TASK_DEBUG_LOG("cancel all tasks in group = %p", this);
21052107

21062108
// Flag the task group itself as cancelled. If this was already
@@ -2114,8 +2116,8 @@ bool TaskGroupBase::cancelAll() {
21142116

21152117
// Cancel all the child tasks. TaskGroup is not a Sendable type,
21162118
// so cancelAll() can only be called from the owning task. This
2117-
// satisfies the precondition on cancelAllChildren().
2118-
_swift_taskGroup_cancelAllChildren(asAbstract(this));
2119+
// satisfies the precondition on cancelAllChildren_unlocked().
2120+
_swift_taskGroup_cancelAllChildren_unlocked(asAbstract(this), owningTask);
21192121

21202122
return true;
21212123
}
@@ -2124,22 +2126,8 @@ SWIFT_CC(swift)
21242126
static void swift_task_cancel_group_child_tasksImpl(TaskGroup *group) {
21252127
// TaskGroup is not a Sendable type, and so this operation (which is not
21262128
// currently exposed in the API) can only be called from the owning
2127-
// task. This satisfies the precondition on cancelAllChildren().
2128-
_swift_taskGroup_cancelAllChildren(group);
2129-
}
2130-
2131-
/// Cancel all the children of the given task group.
2132-
///
2133-
/// The caller must guarantee that this is either called from the
2134-
/// owning task of the task group or while holding the owning task's
2135-
/// status record lock.
2136-
void swift::_swift_taskGroup_cancelAllChildren(TaskGroup *group) {
2137-
// Because only the owning task of the task group can modify the
2138-
// child list of a task group status record, and it can only do so
2139-
// while holding the owning task's status record lock, we do not need
2140-
// any additional synchronization within this function.
2141-
for (auto childTask: group->getTaskRecord()->children())
2142-
swift_task_cancel(childTask);
2129+
// task. This satisfies the precondition on cancelAllChildren_unlocked().
2130+
_swift_taskGroup_cancelAllChildren_unlocked(group, swift_task_getCurrent());
21432131
}
21442132

21452133
// =============================================================================

stdlib/public/Concurrency/TaskPrivate.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,16 @@ AsyncTask *_swift_task_setCurrent(AsyncTask *newTask);
9090

9191
/// Cancel all the child tasks that belong to `group`.
9292
///
93-
/// The caller must guarantee that this is either called from the
94-
/// owning task of the task group or while holding the owning task's
95-
/// status record lock.
93+
/// The caller must guarantee that this is called while holding the owning
94+
/// task's status record lock.
9695
void _swift_taskGroup_cancelAllChildren(TaskGroup *group);
9796

97+
/// Cancel all the child tasks that belong to `group`.
98+
///
99+
/// The caller must guarantee that this is called from the owning task.
100+
void _swift_taskGroup_cancelAllChildren_unlocked(TaskGroup *group,
101+
AsyncTask *owningTask);
102+
98103
/// Remove the given task from the given task group.
99104
///
100105
/// This is an internal API; clients outside of the TaskGroup implementation

stdlib/public/Concurrency/TaskStatus.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,35 @@ void swift::_swift_taskGroup_detachChild(TaskGroup *group,
943943
});
944944
}
945945

946+
/// Cancel all the child tasks that belong to `group`.
947+
///
948+
/// The caller must guarantee that this is called while holding the owning
949+
/// task's status record lock.
950+
void swift::_swift_taskGroup_cancelAllChildren(TaskGroup *group) {
951+
// Because only the owning task of the task group can modify the
952+
// child list of a task group status record, and it can only do so
953+
// while holding the owning task's status record lock, we do not need
954+
// any additional synchronization within this function.
955+
for (auto childTask : group->getTaskRecord()->children())
956+
swift_task_cancel(childTask);
957+
}
958+
959+
/// Cancel all the child tasks that belong to `group`.
960+
///
961+
/// The caller must guarantee that this is called from the owning task.
962+
void swift::_swift_taskGroup_cancelAllChildren_unlocked(TaskGroup *group,
963+
AsyncTask *owningTask) {
964+
// Early out. If there are no children, there's nothing to do. We can safely
965+
// check this without locking, since this can only be concurrently mutated
966+
// from a child task. If there are no children then no more can be added.
967+
if (!group->getTaskRecord()->getFirstChild())
968+
return;
969+
970+
withStatusRecordLock(owningTask, [&group](ActiveTaskStatus status) {
971+
_swift_taskGroup_cancelAllChildren(group);
972+
});
973+
}
974+
946975
/**************************************************************************/
947976
/****************************** CANCELLATION ******************************/
948977
/**************************************************************************/

0 commit comments

Comments
 (0)