@@ -38,6 +38,8 @@ class ControlConnection implements Host.StateListener, Connection.Owner {
38
38
39
39
private static final Logger logger = LoggerFactory .getLogger (ControlConnection .class );
40
40
41
+ private static final boolean EXTENDED_PEER_CHECK = SystemProperties .getBoolean ("com.datastax.driver.EXTENDED_PEER_CHECK" , true );
42
+
41
43
private static final InetAddress bindAllAddress ;
42
44
43
45
static {
@@ -280,7 +282,7 @@ private Connection tryConnect(Host host, boolean isInitialConnection) throws Con
280
282
// We need to refresh the node list again;
281
283
// We want that because the token map was not properly initialized by the first call above,
282
284
// since it requires the list of keyspaces to be loaded.
283
- refreshNodeListAndTokenMap (connection , cluster , false , true );
285
+ refreshNodeListAndTokenMap (connection , cluster , false , false );
284
286
285
287
return connection ;
286
288
} catch (BusyConnectionException e ) {
@@ -421,19 +423,18 @@ void refreshNodeListAndTokenMap() {
421
423
}
422
424
}
423
425
424
- private static InetSocketAddress addressToUseForPeerHost (Row peersRow , InetSocketAddress connectedHost , Cluster .Manager cluster , boolean logMissingRpcAddresses ) {
426
+ private static InetSocketAddress addressToUseForPeerHost (Row peersRow , InetSocketAddress connectedHost , Cluster .Manager cluster ) {
425
427
InetAddress peer = peersRow .getInet ("peer" );
426
428
InetAddress addr = peersRow .getInet ("rpc_address" );
427
429
428
- if (peer .equals (connectedHost .getAddress ()) || (addr != null && addr .equals (connectedHost .getAddress ()))) {
430
+ // We've already called isValid on the row, which checks this
431
+ assert addr != null ;
432
+
433
+ if (peer .equals (connectedHost .getAddress ()) || addr .equals (connectedHost .getAddress ())) {
429
434
// Some DSE versions were inserting a line for the local node in peers (with mostly null values). This has been fixed, but if we
430
435
// detect that's the case, ignore it as it's not really a big deal.
431
436
logger .debug ("System.peers on node {} has a line for itself. This is not normal but is a known problem of some DSE version. Ignoring the entry." , connectedHost );
432
437
return null ;
433
- } else if (addr == null ) {
434
- if (logMissingRpcAddresses )
435
- logger .warn ("No rpc_address found for host {} in {}'s peers system table. {} will be ignored." , peer , connectedHost , peer );
436
- return null ;
437
438
} else if (addr .equals (bindAllAddress )) {
438
439
logger .warn ("Found host with 0.0.0.0 as rpc_address, using listen_address ({}) to contact it instead. If this is incorrect you should avoid the use of 0.0.0.0 server side." , peer );
439
440
addr = peer ;
@@ -462,7 +463,7 @@ private Row fetchNodeInfo(Host host, Connection c) throws ConnectionException, B
462
463
DefaultResultSetFuture future = new DefaultResultSetFuture (null , cluster .protocolVersion (), new Requests .Query (SELECT_PEERS ));
463
464
c .write (future );
464
465
for (Row row : future .get ()) {
465
- InetSocketAddress addr = addressToUseForPeerHost (row , c .address , cluster , true );
466
+ InetSocketAddress addr = addressToUseForPeerHost (row , c .address , cluster );
466
467
if (addr != null && addr .equals (host .getSocketAddress ()))
467
468
return row ;
468
469
}
@@ -492,11 +493,13 @@ boolean refreshNodeInfo(Host host) {
492
493
logger .warn ("No row found for host {} in {}'s peers system table. {} will be ignored." , host .getAddress (), c .address , host .getAddress ());
493
494
return false ;
494
495
}
495
- // Ignore hosts with a null rpc_address, as this is most likely a phantom row in system.peers (JAVA-428).
496
- // Don't test this for the control host since we're already connected to it anyway, and we read the info from system.local
497
- // which doesn't have an rpc_address column (JAVA-546).
498
- } else if (!c .address .equals (host .getSocketAddress ()) && row .getInet ("rpc_address" ) == null ) {
499
- logger .warn ("No rpc_address found for host {} in {}'s peers system table. {} will be ignored." , host .getAddress (), c .address , host .getAddress ());
496
+ }
497
+
498
+ // Ignore rows with invalid values, as this is most likely a phantom row in system.peers (JAVA-428,
499
+ // JAVA-852).
500
+ // Skip the control host since we're already connected to it anyway, and we read the info from system.local,
501
+ // which doesn't have an rpc_address column (JAVA-546).
502
+ if (!c .address .equals (host .getSocketAddress ()) && !isValidPeer (row , true )) {
500
503
return false ;
501
504
}
502
505
@@ -554,7 +557,7 @@ private static void updateLocationInfo(Host host, String datacenter, String rack
554
557
cluster .loadBalancingPolicy ().onAdd (host );
555
558
}
556
559
557
- private static void refreshNodeListAndTokenMap (Connection connection , Cluster .Manager cluster , boolean isInitialConnection , boolean logMissingRpcAddresses ) throws ConnectionException , BusyConnectionException , ExecutionException , InterruptedException {
560
+ private static void refreshNodeListAndTokenMap (Connection connection , Cluster .Manager cluster , boolean isInitialConnection , boolean logInvalidPeers ) throws ConnectionException , BusyConnectionException , ExecutionException , InterruptedException {
558
561
logger .debug ("[Control connection] Refreshing node list and token map" );
559
562
560
563
boolean metadataEnabled = cluster .configuration .getQueryOptions ().isMetadataEnabled ();
@@ -603,10 +606,10 @@ private static void refreshNodeListAndTokenMap(Connection connection, Cluster.Ma
603
606
List <Set <String >> allTokens = new ArrayList <Set <String >>();
604
607
605
608
for (Row row : peersFuture .get ()) {
606
- InetSocketAddress addr = addressToUseForPeerHost (row , connection .address , cluster , logMissingRpcAddresses );
607
- if (addr == null )
609
+ if (!isValidPeer (row , logInvalidPeers ))
608
610
continue ;
609
611
612
+ InetSocketAddress addr = addressToUseForPeerHost (row , connection .address , cluster );
610
613
foundHosts .add (addr );
611
614
dcs .add (row .getString ("data_center" ));
612
615
racks .add (row .getString ("rack" ));
@@ -654,6 +657,46 @@ private static void refreshNodeListAndTokenMap(Connection connection, Cluster.Ma
654
657
cluster .metadata .rebuildTokenMap (partitioner , tokenMap );
655
658
}
656
659
660
+ private static boolean isValidPeer (Row peerRow , boolean logIfInvalid ) {
661
+ boolean isValid = peerRow .getColumnDefinitions ().contains ("rpc_address" )
662
+ && !peerRow .isNull ("rpc_address" );
663
+ if (EXTENDED_PEER_CHECK ) {
664
+ isValid &= peerRow .getColumnDefinitions ().contains ("host_id" )
665
+ && !peerRow .isNull ("host_id" )
666
+ && peerRow .getColumnDefinitions ().contains ("data_center" )
667
+ && !peerRow .isNull ("data_center" )
668
+ && peerRow .getColumnDefinitions ().contains ("rack" )
669
+ && !peerRow .isNull ("rack" )
670
+ && peerRow .getColumnDefinitions ().contains ("tokens" )
671
+ && !peerRow .isNull ("tokens" );
672
+ }
673
+ if (!isValid && logIfInvalid )
674
+ logger .warn ("Found invalid row in system.peers: {}. " +
675
+ "This is likely a gossip or snitch issue, this host will be ignored." , formatInvalidPeer (peerRow ));
676
+ return isValid ;
677
+ }
678
+
679
+ // Custom formatting to avoid spamming the logs if 'tokens' is present and contains a gazillion tokens
680
+ private static String formatInvalidPeer (Row peerRow ) {
681
+ StringBuilder sb = new StringBuilder ("[peer=" + peerRow .getInet ("peer" ));
682
+ formatMissingOrNullColumn (peerRow , "rpc_address" , sb );
683
+ if (EXTENDED_PEER_CHECK ) {
684
+ formatMissingOrNullColumn (peerRow , "host_id" , sb );
685
+ formatMissingOrNullColumn (peerRow , "data_center" , sb );
686
+ formatMissingOrNullColumn (peerRow , "rack" , sb );
687
+ formatMissingOrNullColumn (peerRow , "tokens" , sb );
688
+ }
689
+ sb .append ("]" );
690
+ return sb .toString ();
691
+ }
692
+
693
+ private static void formatMissingOrNullColumn (Row peerRow , String columnName , StringBuilder sb ) {
694
+ if (!peerRow .getColumnDefinitions ().contains (columnName ))
695
+ sb .append (", missing " ).append (columnName );
696
+ else if (peerRow .isNull (columnName ))
697
+ sb .append (", " ).append (columnName ).append ("=null" );
698
+ }
699
+
657
700
boolean waitForSchemaAgreement () throws ConnectionException , BusyConnectionException , ExecutionException , InterruptedException {
658
701
long start = System .nanoTime ();
659
702
long elapsed = 0 ;
@@ -690,7 +733,10 @@ boolean checkSchemaAgreement() throws ConnectionException, BusyConnectionExcepti
690
733
691
734
for (Row row : peersFuture .get ()) {
692
735
693
- InetSocketAddress addr = addressToUseForPeerHost (row , connection .address , cluster , true );
736
+ if (!isValidPeer (row , false ))
737
+ continue ;
738
+
739
+ InetSocketAddress addr = addressToUseForPeerHost (row , connection .address , cluster );
694
740
if (addr == null || row .isNull ("schema_version" ))
695
741
continue ;
696
742
0 commit comments