@@ -769,7 +769,7 @@ public void testMessyElectionsStillMakeClusterGoGreen() throws Exception {
769
769
ensureGreen ("test" );
770
770
}
771
771
772
- public void testBatchModeEnabled () throws Exception {
772
+ public void testBatchModeEnabledWithoutTimeout () throws Exception {
773
773
internalCluster ().startClusterManagerOnlyNodes (
774
774
1 ,
775
775
Settings .builder ().put (ExistingShardsAllocator .EXISTING_SHARDS_ALLOCATOR_BATCH_MODE .getKey (), true ).build ()
@@ -810,6 +810,132 @@ public void testBatchModeEnabled() throws Exception {
810
810
assertEquals (0 , gatewayAllocator .getNumberOfInFlightFetches ());
811
811
}
812
812
813
+ public void testBatchModeEnabledWithSufficientTimeoutAndClusterGreen () throws Exception {
814
+ internalCluster ().startClusterManagerOnlyNodes (
815
+ 1 ,
816
+ Settings .builder ()
817
+ .put (ExistingShardsAllocator .EXISTING_SHARDS_ALLOCATOR_BATCH_MODE .getKey (), true )
818
+ .put (ShardsBatchGatewayAllocator .PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING .getKey (), "20s" )
819
+ .put (ShardsBatchGatewayAllocator .REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING .getKey (), "20s" )
820
+ .build ()
821
+ );
822
+ List <String > dataOnlyNodes = internalCluster ().startDataOnlyNodes (2 );
823
+ createIndex (
824
+ "test" ,
825
+ Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_SHARDS , 1 ).put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 1 ).build ()
826
+ );
827
+ ensureGreen ("test" );
828
+ Settings node0DataPathSettings = internalCluster ().dataPathSettings (dataOnlyNodes .get (0 ));
829
+ Settings node1DataPathSettings = internalCluster ().dataPathSettings (dataOnlyNodes .get (1 ));
830
+ internalCluster ().stopRandomNode (InternalTestCluster .nameFilter (dataOnlyNodes .get (0 )));
831
+ internalCluster ().stopRandomNode (InternalTestCluster .nameFilter (dataOnlyNodes .get (1 )));
832
+ ensureRed ("test" );
833
+ ensureStableCluster (1 );
834
+
835
+ logger .info ("--> Now do a protective reroute" );
836
+ ClusterRerouteResponse clusterRerouteResponse = client ().admin ().cluster ().prepareReroute ().setRetryFailed (true ).get ();
837
+ assertTrue (clusterRerouteResponse .isAcknowledged ());
838
+
839
+ ShardsBatchGatewayAllocator gatewayAllocator = internalCluster ().getInstance (
840
+ ShardsBatchGatewayAllocator .class ,
841
+ internalCluster ().getClusterManagerName ()
842
+ );
843
+ assertTrue (ExistingShardsAllocator .EXISTING_SHARDS_ALLOCATOR_BATCH_MODE .get (internalCluster ().clusterService ().getSettings ()));
844
+ assertEquals (1 , gatewayAllocator .getNumberOfStartedShardBatches ());
845
+ assertEquals (1 , gatewayAllocator .getNumberOfStoreShardBatches ());
846
+
847
+ // Now start both data nodes and ensure batch mode is working
848
+ logger .info ("--> restarting the stopped nodes" );
849
+ internalCluster ().startDataOnlyNode (Settings .builder ().put ("node.name" , dataOnlyNodes .get (0 )).put (node0DataPathSettings ).build ());
850
+ internalCluster ().startDataOnlyNode (Settings .builder ().put ("node.name" , dataOnlyNodes .get (1 )).put (node1DataPathSettings ).build ());
851
+ ensureStableCluster (3 );
852
+ ensureGreen ("test" );
853
+ assertEquals (0 , gatewayAllocator .getNumberOfStartedShardBatches ());
854
+ assertEquals (0 , gatewayAllocator .getNumberOfStoreShardBatches ());
855
+ assertEquals (0 , gatewayAllocator .getNumberOfInFlightFetches ());
856
+ }
857
+
858
+ public void testBatchModeEnabledWithInSufficientTimeoutButClusterGreen () throws Exception {
859
+
860
+ internalCluster ().startClusterManagerOnlyNodes (
861
+ 1 ,
862
+ Settings .builder ().put (ExistingShardsAllocator .EXISTING_SHARDS_ALLOCATOR_BATCH_MODE .getKey (), true ).build ()
863
+ );
864
+ List <String > dataOnlyNodes = internalCluster ().startDataOnlyNodes (2 );
865
+ createNIndices (50 , "test" ); // this will create 50p, 50r shards
866
+ ensureStableCluster (3 );
867
+ IndicesStatsResponse indicesStats = dataNodeClient ().admin ().indices ().prepareStats ().get ();
868
+ assertThat (indicesStats .getSuccessfulShards (), equalTo (100 ));
869
+ ClusterHealthResponse health = client ().admin ()
870
+ .cluster ()
871
+ .health (Requests .clusterHealthRequest ().waitForGreenStatus ().timeout ("1m" ))
872
+ .actionGet ();
873
+ assertFalse (health .isTimedOut ());
874
+ assertEquals (GREEN , health .getStatus ());
875
+
876
+ String clusterManagerName = internalCluster ().getClusterManagerName ();
877
+ Settings clusterManagerDataPathSettings = internalCluster ().dataPathSettings (clusterManagerName );
878
+ Settings node0DataPathSettings = internalCluster ().dataPathSettings (dataOnlyNodes .get (0 ));
879
+ Settings node1DataPathSettings = internalCluster ().dataPathSettings (dataOnlyNodes .get (1 ));
880
+
881
+ internalCluster ().stopCurrentClusterManagerNode ();
882
+ internalCluster ().stopRandomNode (InternalTestCluster .nameFilter (dataOnlyNodes .get (0 )));
883
+ internalCluster ().stopRandomNode (InternalTestCluster .nameFilter (dataOnlyNodes .get (1 )));
884
+
885
+ // Now start cluster manager node and post that verify batches created
886
+ internalCluster ().startClusterManagerOnlyNodes (
887
+ 1 ,
888
+ Settings .builder ()
889
+ .put ("node.name" , clusterManagerName )
890
+ .put (clusterManagerDataPathSettings )
891
+ .put (ShardsBatchGatewayAllocator .GATEWAY_ALLOCATOR_BATCH_SIZE .getKey (), 5 )
892
+ .put (ShardsBatchGatewayAllocator .PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING .getKey (), "10ms" )
893
+ .put (ShardsBatchGatewayAllocator .REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING .getKey (), "10ms" )
894
+ .put (ExistingShardsAllocator .EXISTING_SHARDS_ALLOCATOR_BATCH_MODE .getKey (), true )
895
+ .build ()
896
+ );
897
+ ensureStableCluster (1 );
898
+
899
+ logger .info ("--> Now do a protective reroute" ); // to avoid any race condition in test
900
+ ClusterRerouteResponse clusterRerouteResponse = client ().admin ().cluster ().prepareReroute ().setRetryFailed (true ).get ();
901
+ assertTrue (clusterRerouteResponse .isAcknowledged ());
902
+
903
+ ShardsBatchGatewayAllocator gatewayAllocator = internalCluster ().getInstance (
904
+ ShardsBatchGatewayAllocator .class ,
905
+ internalCluster ().getClusterManagerName ()
906
+ );
907
+
908
+ assertTrue (ExistingShardsAllocator .EXISTING_SHARDS_ALLOCATOR_BATCH_MODE .get (internalCluster ().clusterService ().getSettings ()));
909
+ assertEquals (10 , gatewayAllocator .getNumberOfStartedShardBatches ());
910
+ assertEquals (10 , gatewayAllocator .getNumberOfStoreShardBatches ());
911
+ health = client (internalCluster ().getClusterManagerName ()).admin ().cluster ().health (Requests .clusterHealthRequest ()).actionGet ();
912
+ assertFalse (health .isTimedOut ());
913
+ assertEquals (RED , health .getStatus ());
914
+ assertEquals (100 , health .getUnassignedShards ());
915
+ assertEquals (0 , health .getInitializingShards ());
916
+ assertEquals (0 , health .getActiveShards ());
917
+ assertEquals (0 , health .getRelocatingShards ());
918
+ assertEquals (0 , health .getNumberOfDataNodes ());
919
+
920
+ // Now start both data nodes and ensure batch mode is working
921
+ logger .info ("--> restarting the stopped nodes" );
922
+ internalCluster ().startDataOnlyNode (Settings .builder ().put ("node.name" , dataOnlyNodes .get (0 )).put (node0DataPathSettings ).build ());
923
+ internalCluster ().startDataOnlyNode (Settings .builder ().put ("node.name" , dataOnlyNodes .get (1 )).put (node1DataPathSettings ).build ());
924
+ ensureStableCluster (3 );
925
+
926
+ // wait for cluster to turn green
927
+ health = client ().admin ().cluster ().health (Requests .clusterHealthRequest ().waitForGreenStatus ().timeout ("5m" )).actionGet ();
928
+ assertFalse (health .isTimedOut ());
929
+ assertEquals (GREEN , health .getStatus ());
930
+ assertEquals (0 , health .getUnassignedShards ());
931
+ assertEquals (0 , health .getInitializingShards ());
932
+ assertEquals (100 , health .getActiveShards ());
933
+ assertEquals (0 , health .getRelocatingShards ());
934
+ assertEquals (2 , health .getNumberOfDataNodes ());
935
+ assertEquals (0 , gatewayAllocator .getNumberOfStartedShardBatches ());
936
+ assertEquals (0 , gatewayAllocator .getNumberOfStoreShardBatches ());
937
+ }
938
+
813
939
public void testBatchModeDisabled () throws Exception {
814
940
internalCluster ().startClusterManagerOnlyNodes (
815
941
1 ,
0 commit comments