Skip to content

Commit 66bc7ea

Browse files
authored
HDDS-12535. Intermittent failure in TestContainerReportHandling (#8060)
1 parent 87a674c commit 66bc7ea

File tree

3 files changed

+37
-35
lines changed

3 files changed

+37
-35
lines changed

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java

+5-13
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
2424
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
2525
import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose;
26+
import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
2627
import static org.assertj.core.api.Assertions.assertThat;
2728
import static org.junit.jupiter.api.Assertions.assertEquals;
2829
import static org.junit.jupiter.api.Assertions.assertNotNull;
2930
import static org.junit.jupiter.api.Assertions.assertTrue;
30-
import static org.junit.jupiter.api.Assertions.fail;
3131

3232
import java.io.IOException;
3333
import java.io.OutputStream;
@@ -41,7 +41,6 @@
4141
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
4242
import org.apache.hadoop.hdds.scm.container.ContainerID;
4343
import org.apache.hadoop.hdds.scm.container.ContainerManager;
44-
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
4544
import org.apache.hadoop.ozone.HddsDatanodeService;
4645
import org.apache.hadoop.ozone.MiniOzoneCluster;
4746
import org.apache.hadoop.ozone.client.ObjectStore;
@@ -52,15 +51,12 @@
5251
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
5352
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
5453
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
55-
import org.apache.ozone.test.GenericTestUtils;
56-
import org.apache.ozone.test.tag.Flaky;
5754
import org.junit.jupiter.params.ParameterizedTest;
5855
import org.junit.jupiter.params.provider.EnumSource;
5956

6057
/**
6158
* Tests for container report handling.
6259
*/
63-
@Flaky("HDDS-12535")
6460
public class TestContainerReportHandling {
6561
private static final String VOLUME = "vol1";
6662
private static final String BUCKET = "bucket1";
@@ -97,6 +93,9 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
9793
ContainerID containerID = ContainerID.valueOf(keyLocation.getContainerID());
9894
waitForContainerClose(cluster, containerID.getId());
9995

96+
// also wait till the container is closed in SCM
97+
waitForContainerStateInSCM(cluster.getStorageContainerManager(), containerID, HddsProtos.LifeCycleState.CLOSED);
98+
10099
// move the container to DELETING
101100
ContainerManager containerManager = cluster.getStorageContainerManager().getContainerManager();
102101
containerManager.updateContainerState(containerID, HddsProtos.LifeCycleEvent.DELETE);
@@ -111,14 +110,7 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
111110
// restart a DN and wait for the container to get CLOSED.
112111
HddsDatanodeService dn = cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
113112
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
114-
GenericTestUtils.waitFor(() -> {
115-
try {
116-
return containerManager.getContainer(containerID).getState() == HddsProtos.LifeCycleState.CLOSED;
117-
} catch (ContainerNotFoundException e) {
118-
fail(e);
119-
}
120-
return false;
121-
}, 2000, 20000);
113+
waitForContainerStateInSCM(cluster.getStorageContainerManager(), containerID, HddsProtos.LifeCycleState.CLOSED);
122114

123115
assertEquals(HddsProtos.LifeCycleState.CLOSED, containerManager.getContainer(containerID).getState());
124116
}

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java

+15-22
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,26 @@
2323
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
2424
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
2525
import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose;
26+
import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
2627
import static org.assertj.core.api.Assertions.assertThat;
2728
import static org.junit.jupiter.api.Assertions.assertEquals;
2829
import static org.junit.jupiter.api.Assertions.assertNotNull;
2930
import static org.junit.jupiter.api.Assertions.assertTrue;
30-
import static org.junit.jupiter.api.Assertions.fail;
3131

3232
import java.io.IOException;
3333
import java.io.OutputStream;
3434
import java.nio.file.Path;
3535
import java.nio.file.Paths;
3636
import java.util.List;
3737
import java.util.concurrent.TimeUnit;
38+
import java.util.concurrent.TimeoutException;
3839
import org.apache.hadoop.fs.FileUtil;
3940
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
4041
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
4142
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
4243
import org.apache.hadoop.hdds.scm.container.ContainerID;
4344
import org.apache.hadoop.hdds.scm.container.ContainerManager;
44-
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
45+
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
4546
import org.apache.hadoop.ozone.HddsDatanodeService;
4647
import org.apache.hadoop.ozone.MiniOzoneCluster;
4748
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
@@ -53,15 +54,12 @@
5354
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
5455
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
5556
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
56-
import org.apache.ozone.test.GenericTestUtils;
57-
import org.apache.ozone.test.tag.Flaky;
5857
import org.junit.jupiter.params.ParameterizedTest;
5958
import org.junit.jupiter.params.provider.EnumSource;
6059

6160
/**
6261
* Tests for container report handling with SCM High Availability.
6362
*/
64-
@Flaky("HDDS-12535")
6563
public class TestContainerReportHandlingWithHA {
6664
private static final String VOLUME = "vol1";
6765
private static final String BUCKET = "bucket1";
@@ -99,6 +97,8 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
9997
ContainerID containerID = ContainerID.valueOf(keyLocation.getContainerID());
10098
waitForContainerClose(cluster, containerID.getId());
10199

100+
waitForContainerStateInAllSCMs(cluster, containerID, HddsProtos.LifeCycleState.CLOSED);
101+
102102
// move the container to DELETING
103103
ContainerManager containerManager = cluster.getScmLeader().getContainerManager();
104104
containerManager.updateContainerState(containerID, HddsProtos.LifeCycleEvent.DELETE);
@@ -113,23 +113,8 @@ void testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
113113
// restart a DN and wait for the container to get CLOSED in all SCMs
114114
HddsDatanodeService dn = cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
115115
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
116-
ContainerManager[] array = new ContainerManager[numSCM];
117-
for (int i = 0; i < numSCM; i++) {
118-
array[i] = cluster.getStorageContainerManager(i).getContainerManager();
119-
}
120-
GenericTestUtils.waitFor(() -> {
121-
try {
122-
for (ContainerManager manager : array) {
123-
if (manager.getContainer(containerID).getState() != HddsProtos.LifeCycleState.CLOSED) {
124-
return false;
125-
}
126-
}
127-
return true;
128-
} catch (ContainerNotFoundException e) {
129-
fail(e);
130-
}
131-
return false;
132-
}, 2000, 20000);
116+
117+
waitForContainerStateInAllSCMs(cluster, containerID, HddsProtos.LifeCycleState.CLOSED);
133118

134119
assertEquals(HddsProtos.LifeCycleState.CLOSED, containerManager.getContainer(containerID).getState());
135120
}
@@ -177,4 +162,12 @@ private void createTestData(OzoneClient client) throws IOException {
177162
}
178163
}
179164

165+
private static void waitForContainerStateInAllSCMs(MiniOzoneHAClusterImpl cluster, ContainerID containerID,
166+
HddsProtos.LifeCycleState desiredState)
167+
throws TimeoutException, InterruptedException {
168+
for (StorageContainerManager scm : cluster.getStorageContainerManagersList()) {
169+
waitForContainerStateInSCM(scm, containerID, desiredState);
170+
}
171+
}
172+
180173
}

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java

+17
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import static org.junit.jupiter.api.Assertions.assertFalse;
2525
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
2626
import static org.junit.jupiter.api.Assertions.assertTrue;
27+
import static org.junit.jupiter.api.Assertions.fail;
2728

2829
import java.io.IOException;
2930
import java.security.MessageDigest;
@@ -455,4 +456,20 @@ public static void setConfig(OzoneConfiguration conf, String key, String value)
455456
conf.set(key, value);
456457
}
457458
}
459+
460+
public static void waitForContainerStateInSCM(StorageContainerManager scm,
461+
ContainerID containerID, HddsProtos.LifeCycleState expectedState)
462+
throws TimeoutException, InterruptedException {
463+
ContainerManager containerManager = scm.getContainerManager();
464+
GenericTestUtils.waitFor(() -> {
465+
try {
466+
return containerManager.getContainer(containerID).getState() == expectedState;
467+
} catch (ContainerNotFoundException e) {
468+
LOG.error("Container {} not found while waiting for state {}",
469+
containerID, expectedState, e);
470+
fail("Container " + containerID + " not found while waiting for state " + expectedState + ": " + e);
471+
return false;
472+
}
473+
}, 2000, 20000);
474+
}
458475
}

0 commit comments

Comments
 (0)