Skip to content

Commit

Permalink
fix: Fixes a deadlock which we observed. (#345)
Browse files Browse the repository at this point in the history
The call reInviteParticipants triggered by an ICE failure notification
resulted in a synchronous call to expireChannels (while holding the lock
on #participantLock), followed by an attempt to lock #bridges in
inviteParticipant. This fix makes sure that we never lock #bridges first
and then #participants.

 ===================================================
 "pool-1-thread-1":
     at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipants(JitsiMeetConferenceImpl.java:2426)
     - waiting to lock <0x0000000700ece2b8> (a java.lang.Object)
     at org.jitsi.jicofo.JitsiMeetConferenceImpl.onBridgeDown(JitsiMeetConferenceImpl.java:2295)
     - locked <0x0000000700ece758> (a java.util.LinkedList)
     at org.jitsi.jicofo.JitsiMeetConferenceImpl.handleEvent(JitsiMeetConferenceImpl.java:2235)
     at org.jitsi.eventadmin.EventAdminImpl.callEventHandler(EventAdminImpl.java:207)
     at org.jitsi.eventadmin.EventAdminImpl.access$000(EventAdminImpl.java:34)
     at org.jitsi.eventadmin.EventAdminImpl$1.run(EventAdminImpl.java:187)
     at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
     at java.lang.Thread.run(Thread.java:748)
 "Smack-Single Threaded Executor 0 (0)":
     at org.jitsi.jicofo.JitsiMeetConferenceImpl.inviteParticipant(JitsiMeetConferenceImpl.java:798)
     - waiting to lock <0x0000000700ece758> (a java.util.LinkedList)
     at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipants(JitsiMeetConferenceImpl.java:2448)
     - locked <0x0000000700ece2b8> (a java.lang.Object)
     at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipant(JitsiMeetConferenceImpl.java:2414)
     at org.jitsi.jicofo.JitsiMeetConferenceImpl.onSessionInfo(JitsiMeetConferenceImpl.java:1543)
     at org.jitsi.protocol.xmpp.AbstractOperationSetJingle.processJingleIQ(AbstractOperationSetJingle.java:349)
     at org.jitsi.protocol.xmpp.AbstractOperationSetJingle.handleIQRequest(AbstractOperationSetJingle.java:78)
     at org.jivesoftware.smack.AbstractXMPPConnection$3.run(AbstractXMPPConnection.java:1154)
     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
     at java.lang.Thread.run(Thread.java:748)
  • Loading branch information
bgrozev authored Feb 13, 2019
1 parent 614a381 commit 8db1f4d
Showing 1 changed file with 31 additions and 5 deletions.
36 changes: 31 additions & 5 deletions src/main/java/org/jitsi/jicofo/JitsiMeetConferenceImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@
* participants, as well as the COLIBRI session with the jitsi-videobridge
* instances used for the conference.
*
* A note on synchronization: this class uses a lot of 'synchronized' blocks,
* on 4 different objects ({@link #bridges}, {@link #participantLock},
* {@code this} and {@link BridgeSession#octoParticipant}). At the time of this
* writing it seems that multiple locks are acquired only in the following
* orders:
* {@code participantsLock} -> {@code bridges}, and
* {@code participantsLock} -> {@code this} -> {@code bridges}.
*
* This seems safe, but it is hard to maintain this way, and we should
* re-factor to simplify.
*
*
* @author Pawel Domas
* @author Boris Grozev
*/
Expand Down Expand Up @@ -151,6 +163,11 @@ public class JitsiMeetConferenceImpl

/**
* This lock is used to synchronise write access to {@link #participants}.
*
* WARNING: To avoid deadlocks we must make sure that any code paths that
* lock both {@link #bridges} and {@link #participantLock} does so in the
* correct order. The lock on {@link #participantLock} must be acquired
* first.
*/
private final Object participantLock = new Object();

Expand Down Expand Up @@ -228,6 +245,12 @@ public class JitsiMeetConferenceImpl

/**
* The list of {@link BridgeSession} currently in use by this conference.
*
* WARNING: To avoid deadlocks we must make sure that any code paths that
* lock both {@link #bridges} and {@link #participantLock} does so in the
* correct order. The lock on {@link #participantLock} must be acquired
* first.
*
*/
private final List<BridgeSession> bridges = new LinkedList<>();

Expand Down Expand Up @@ -2273,6 +2296,8 @@ private void onBridgeUp(Jid bridgeJid)
*/
void onBridgeDown(Jid bridgeJid)
{
List<Participant> participantsToReinvite = Collections.EMPTY_LIST;

synchronized (bridges)
{
BridgeSession bridgeSession = findBridgeSession(bridgeJid);
Expand All @@ -2282,19 +2307,21 @@ void onBridgeDown(Jid bridgeJid)

// Note: the Jingle sessions are still alive, we'll just
// (try to) move to a new bridge and send transport-replace.
List<Participant> participantsToReinvite
= bridgeSession.terminateAll();
participantsToReinvite = bridgeSession.terminateAll();

bridges.remove(bridgeSession);
setConferenceProperty(
ConferenceProperties.KEY_BRIDGE_COUNT,
Integer.toString(bridges.size()));

updateOctoRelays();

reInviteParticipants(participantsToReinvite);
}
}

if (!participantsToReinvite.isEmpty())
{
reInviteParticipants(participantsToReinvite);
}
}

/**
Expand Down Expand Up @@ -2707,7 +2734,6 @@ private List<Participant> terminateAll()
public boolean terminate(AbstractParticipant participant,
boolean syncExpire)
{
//TODO synchronize?
boolean octo = participant == this.octoParticipant;
boolean removed = octo || participants.remove(participant);
if (removed)
Expand Down

0 comments on commit 8db1f4d

Please sign in to comment.