From 8db1f4db53797e6ad8239538e17091904d01f0f4 Mon Sep 17 00:00:00 2001 From: bgrozev Date: Wed, 13 Feb 2019 18:13:15 +0000 Subject: [PATCH] fix: Fixes a deadlock which we observed. (#345) The call reInviteParticipants triggered by an ICE failure notification resulted in a synchronous call to expireChannels (while holding the lock on #participantLock), followed by an attempt to lock #bridges in inviteParticipant. This fix makes sure that we never lock #bridges first and then #participants. =================================================== "pool-1-thread-1": at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipants(JitsiMeetConferenceImpl.java:2426) - waiting to lock <0x0000000700ece2b8> (a java.lang.Object) at org.jitsi.jicofo.JitsiMeetConferenceImpl.onBridgeDown(JitsiMeetConferenceImpl.java:2295) - locked <0x0000000700ece758> (a java.util.LinkedList) at org.jitsi.jicofo.JitsiMeetConferenceImpl.handleEvent(JitsiMeetConferenceImpl.java:2235) at org.jitsi.eventadmin.EventAdminImpl.callEventHandler(EventAdminImpl.java:207) at org.jitsi.eventadmin.EventAdminImpl.access$000(EventAdminImpl.java:34) at org.jitsi.eventadmin.EventAdminImpl$1.run(EventAdminImpl.java:187) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) "Smack-Single Threaded Executor 0 (0)": at org.jitsi.jicofo.JitsiMeetConferenceImpl.inviteParticipant(JitsiMeetConferenceImpl.java:798) - waiting to lock <0x0000000700ece758> (a java.util.LinkedList) at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipants(JitsiMeetConferenceImpl.java:2448) - locked <0x0000000700ece2b8> (a java.lang.Object) at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipant(JitsiMeetConferenceImpl.java:2414) at org.jitsi.jicofo.JitsiMeetConferenceImpl.onSessionInfo(JitsiMeetConferenceImpl.java:1543) at org.jitsi.protocol.xmpp.AbstractOperationSetJingle.processJingleIQ(AbstractOperationSetJingle.java:349) at org.jitsi.protocol.xmpp.AbstractOperationSetJingle.handleIQRequest(AbstractOperationSetJingle.java:78) at org.jivesoftware.smack.AbstractXMPPConnection$3.run(AbstractXMPPConnection.java:1154) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) --- .../jitsi/jicofo/JitsiMeetConferenceImpl.java | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/jitsi/jicofo/JitsiMeetConferenceImpl.java b/src/main/java/org/jitsi/jicofo/JitsiMeetConferenceImpl.java index 1c26b46856..dc2c65f0e6 100644 --- a/src/main/java/org/jitsi/jicofo/JitsiMeetConferenceImpl.java +++ b/src/main/java/org/jitsi/jicofo/JitsiMeetConferenceImpl.java @@ -51,6 +51,18 @@ * participants, as well as the COLIBRI session with the jitsi-videobridge * instances used for the conference. * + * A note on synchronization: this class uses a lot of 'synchronized' blocks, + * on 4 different objects ({@link #bridges}, {@link #participantLock}, + * {@code this} and {@link BridgeSession#octoParticipant}). At the time of this + * writing it seems that multiple locks are acquired only in the following + * orders: + * {@code participantsLock} -> {@code bridges}, and + * {@code participantsLock} -> {@code this} -> {@code bridges}. + * + * This seems safe, but it is hard to maintain this way, and we should + * re-factor to simplify. + * + * * @author Pawel Domas * @author Boris Grozev */ @@ -151,6 +163,11 @@ public class JitsiMeetConferenceImpl /** * This lock is used to synchronise write access to {@link #participants}. + * + * WARNING: To avoid deadlocks we must make sure that any code paths that + * lock both {@link #bridges} and {@link #participantLock} does so in the + * correct order. The lock on {@link #participantLock} must be acquired + * first. */ private final Object participantLock = new Object(); @@ -228,6 +245,12 @@ public class JitsiMeetConferenceImpl /** * The list of {@link BridgeSession} currently in use by this conference. + * + * WARNING: To avoid deadlocks we must make sure that any code paths that + * lock both {@link #bridges} and {@link #participantLock} does so in the + * correct order. The lock on {@link #participantLock} must be acquired + * first. + * */ private final List bridges = new LinkedList<>(); @@ -2273,6 +2296,8 @@ private void onBridgeUp(Jid bridgeJid) */ void onBridgeDown(Jid bridgeJid) { + List participantsToReinvite = Collections.EMPTY_LIST; + synchronized (bridges) { BridgeSession bridgeSession = findBridgeSession(bridgeJid); @@ -2282,8 +2307,7 @@ void onBridgeDown(Jid bridgeJid) // Note: the Jingle sessions are still alive, we'll just // (try to) move to a new bridge and send transport-replace. - List participantsToReinvite - = bridgeSession.terminateAll(); + participantsToReinvite = bridgeSession.terminateAll(); bridges.remove(bridgeSession); setConferenceProperty( @@ -2291,10 +2315,13 @@ void onBridgeDown(Jid bridgeJid) Integer.toString(bridges.size())); updateOctoRelays(); - - reInviteParticipants(participantsToReinvite); } } + + if (!participantsToReinvite.isEmpty()) + { + reInviteParticipants(participantsToReinvite); + } } /** @@ -2707,7 +2734,6 @@ private List terminateAll() public boolean terminate(AbstractParticipant participant, boolean syncExpire) { - //TODO synchronize? boolean octo = participant == this.octoParticipant; boolean removed = octo || participants.remove(participant); if (removed)