Skip to content

Commit 74d61a4

Browse files
authored
Retry when the server can't be resolved (#123852)
1 parent 076f61d commit 74d61a4

File tree

4 files changed

+162
-6
lines changed

4 files changed

+162
-6
lines changed

docs/changelog/123852.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 123852
2+
summary: Retry when the server can't be resolved (Google Cloud Storage)
3+
area: Snapshot/Restore
4+
type: enhancement
5+
issues: []

modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageService.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import com.google.api.client.http.HttpTransport;
1515
import com.google.api.client.http.javanet.NetHttpTransport;
1616
import com.google.api.client.util.SecurityUtils;
17+
import com.google.api.gax.retrying.ResultRetryAlgorithm;
1718
import com.google.auth.oauth2.GoogleCredentials;
1819
import com.google.auth.oauth2.ServiceAccountCredentials;
1920
import com.google.cloud.ServiceOptions;
@@ -24,6 +25,7 @@
2425

2526
import org.apache.logging.log4j.LogManager;
2627
import org.apache.logging.log4j.Logger;
28+
import org.elasticsearch.ExceptionsHelper;
2729
import org.elasticsearch.cluster.node.DiscoveryNode;
2830
import org.elasticsearch.common.Strings;
2931
import org.elasticsearch.common.blobstore.OperationPurpose;
@@ -41,6 +43,7 @@
4143
import java.net.Proxy;
4244
import java.net.URI;
4345
import java.net.URL;
46+
import java.net.UnknownHostException;
4447
import java.security.KeyStore;
4548
import java.util.Map;
4649
import java.util.stream.Collectors;
@@ -207,7 +210,7 @@ StorageOptions createStorageOptions(
207210
final HttpTransportOptions httpTransportOptions
208211
) {
209212
final StorageOptions.Builder storageOptionsBuilder = StorageOptions.newBuilder()
210-
.setStorageRetryStrategy(StorageRetryStrategy.getLegacyStorageRetryStrategy())
213+
.setStorageRetryStrategy(getRetryStrategy())
211214
.setTransportOptions(httpTransportOptions)
212215
.setHeaderProvider(() -> {
213216
return Strings.hasLength(gcsClientSettings.getApplicationName())
@@ -264,6 +267,19 @@ StorageOptions createStorageOptions(
264267
return storageOptionsBuilder.build();
265268
}
266269

270+
protected StorageRetryStrategy getRetryStrategy() {
271+
return ShouldRetryDecorator.decorate(
272+
StorageRetryStrategy.getLegacyStorageRetryStrategy(),
273+
(Throwable prevThrowable, Object prevResponse, ResultRetryAlgorithm<Object> delegate) -> {
274+
// Retry in the event of an unknown host exception
275+
if (ExceptionsHelper.unwrap(prevThrowable, UnknownHostException.class) != null) {
276+
return true;
277+
}
278+
return delegate.shouldRetry(prevThrowable, prevResponse);
279+
}
280+
);
281+
}
282+
267283
/**
268284
* This method imitates what MetadataConfig.getProjectId() does, but does not have the endpoint hardcoded.
269285
*/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.repositories.gcs;
11+
12+
import com.google.api.gax.retrying.ResultRetryAlgorithm;
13+
import com.google.api.gax.retrying.TimedAttemptSettings;
14+
import com.google.cloud.storage.StorageRetryStrategy;
15+
16+
import java.util.concurrent.CancellationException;
17+
18+
public class ShouldRetryDecorator<T> implements StorageRetryStrategy {
19+
20+
private final ResultRetryAlgorithm<T> idempotentRetryAlgorithm;
21+
private final ResultRetryAlgorithm<T> nonIdempotentRetryAlgorithm;
22+
23+
/**
24+
* Decorate the should-retry logic for the specified {@link StorageRetryStrategy}
25+
*
26+
* @param delegate The underling storage retry strategy
27+
* @param shouldRetryDecorator The decorated behaviour of {@link ResultRetryAlgorithm#shouldRetry(Throwable, Object)}
28+
* @return A decorated {@link StorageRetryStrategy}
29+
*/
30+
public static StorageRetryStrategy decorate(StorageRetryStrategy delegate, Decorator<?> shouldRetryDecorator) {
31+
return new ShouldRetryDecorator<>(delegate, shouldRetryDecorator);
32+
}
33+
34+
/**
35+
* The logic to use for {@link ResultRetryAlgorithm#shouldRetry(Throwable, Object)}
36+
*/
37+
public interface Decorator<R> {
38+
boolean shouldRetry(Throwable prevThrowable, R prevResponse, ResultRetryAlgorithm<R> delegate);
39+
}
40+
41+
/**
42+
* @param delegate The delegate {@link StorageRetryStrategy}
43+
* @param shouldRetryDecorator The function to call for shouldRetry for idempotent and non-idempotent requests
44+
*/
45+
@SuppressWarnings("unchecked")
46+
private ShouldRetryDecorator(StorageRetryStrategy delegate, Decorator<T> shouldRetryDecorator) {
47+
this.idempotentRetryAlgorithm = new DelegatingResultRetryAlgorithm<>(
48+
(ResultRetryAlgorithm<T>) delegate.getIdempotentHandler(),
49+
shouldRetryDecorator
50+
);
51+
this.nonIdempotentRetryAlgorithm = new DelegatingResultRetryAlgorithm<>(
52+
(ResultRetryAlgorithm<T>) delegate.getNonidempotentHandler(),
53+
shouldRetryDecorator
54+
);
55+
}
56+
57+
@Override
58+
public ResultRetryAlgorithm<?> getIdempotentHandler() {
59+
return idempotentRetryAlgorithm;
60+
}
61+
62+
@Override
63+
public ResultRetryAlgorithm<?> getNonidempotentHandler() {
64+
return nonIdempotentRetryAlgorithm;
65+
}
66+
67+
private record DelegatingResultRetryAlgorithm<R>(ResultRetryAlgorithm<R> delegate, Decorator<R> shouldRetryDecorator)
68+
implements
69+
ResultRetryAlgorithm<R> {
70+
71+
@Override
72+
public TimedAttemptSettings createNextAttempt(Throwable prevThrowable, R prevResponse, TimedAttemptSettings prevSettings) {
73+
return delegate.createNextAttempt(prevThrowable, prevResponse, prevSettings);
74+
}
75+
76+
@Override
77+
public boolean shouldRetry(Throwable prevThrowable, R prevResponse) throws CancellationException {
78+
return shouldRetryDecorator.shouldRetry(prevThrowable, prevResponse, delegate);
79+
}
80+
}
81+
}

modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java

+59-5
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
import fixture.gcs.FakeOAuth2HttpHandler;
1212
import fixture.gcs.GoogleCloudStorageHttpHandler;
1313

14+
import com.google.api.client.http.HttpExecuteInterceptor;
15+
import com.google.api.client.http.HttpRequestInitializer;
1416
import com.google.api.gax.retrying.RetrySettings;
17+
import com.google.cloud.ServiceOptions;
1518
import com.google.cloud.http.HttpTransportOptions;
1619
import com.google.cloud.storage.StorageException;
1720
import com.google.cloud.storage.StorageOptions;
18-
import com.google.cloud.storage.StorageRetryStrategy;
1921
import com.sun.net.httpserver.HttpHandler;
2022

2123
import org.apache.http.HttpStatus;
@@ -61,6 +63,7 @@
6163
import java.util.Objects;
6264
import java.util.Optional;
6365
import java.util.Queue;
66+
import java.util.concurrent.ConcurrentHashMap;
6467
import java.util.concurrent.ConcurrentLinkedQueue;
6568
import java.util.concurrent.atomic.AtomicBoolean;
6669
import java.util.concurrent.atomic.AtomicInteger;
@@ -89,12 +92,19 @@
8992
@SuppressForbidden(reason = "use a http server")
9093
public class GoogleCloudStorageBlobContainerRetriesTests extends AbstractBlobContainerRetriesTestCase {
9194

95+
private final Map<String, AtomicInteger> requestCounters = new ConcurrentHashMap<>();
96+
private String endpointUrlOverride;
97+
9298
private String httpServerUrl() {
9399
assertThat(httpServer, notNullValue());
94100
InetSocketAddress address = httpServer.getAddress();
95101
return "http://" + InetAddresses.toUriString(address.getAddress()) + ":" + address.getPort();
96102
}
97103

104+
private String getEndpointUrl() {
105+
return endpointUrlOverride != null ? endpointUrlOverride : httpServerUrl();
106+
}
107+
98108
@Override
99109
protected String downloadStorageEndpoint(BlobContainer container, String blob) {
100110
return "/download/storage/v1/b/bucket/o/" + container.path().buildAsString() + blob;
@@ -120,7 +130,7 @@ protected BlobContainer createBlobContainer(
120130
) {
121131
final Settings.Builder clientSettings = Settings.builder();
122132
final String client = randomAlphaOfLength(5).toLowerCase(Locale.ROOT);
123-
clientSettings.put(ENDPOINT_SETTING.getConcreteSettingForNamespace(client).getKey(), httpServerUrl());
133+
clientSettings.put(ENDPOINT_SETTING.getConcreteSettingForNamespace(client).getKey(), getEndpointUrl());
124134
clientSettings.put(TOKEN_URI_SETTING.getConcreteSettingForNamespace(client).getKey(), httpServerUrl() + "/token");
125135
if (readTimeout != null) {
126136
clientSettings.put(READ_TIMEOUT_SETTING.getConcreteSettingForNamespace(client).getKey(), readTimeout);
@@ -136,8 +146,33 @@ StorageOptions createStorageOptions(
136146
final GoogleCloudStorageClientSettings gcsClientSettings,
137147
final HttpTransportOptions httpTransportOptions
138148
) {
139-
StorageOptions options = super.createStorageOptions(gcsClientSettings, httpTransportOptions);
140-
RetrySettings.Builder retrySettingsBuilder = RetrySettings.newBuilder()
149+
final HttpTransportOptions requestCountingHttpTransportOptions = new HttpTransportOptions(
150+
HttpTransportOptions.newBuilder()
151+
.setConnectTimeout(httpTransportOptions.getConnectTimeout())
152+
.setHttpTransportFactory(httpTransportOptions.getHttpTransportFactory())
153+
.setReadTimeout(httpTransportOptions.getReadTimeout())
154+
) {
155+
@Override
156+
public HttpRequestInitializer getHttpRequestInitializer(ServiceOptions<?, ?> serviceOptions) {
157+
// Add initializer/interceptor without interfering with any pre-existing ones
158+
HttpRequestInitializer httpRequestInitializer = super.getHttpRequestInitializer(serviceOptions);
159+
return request -> {
160+
if (httpRequestInitializer != null) {
161+
httpRequestInitializer.initialize(request);
162+
}
163+
HttpExecuteInterceptor interceptor = request.getInterceptor();
164+
request.setInterceptor(req -> {
165+
if (interceptor != null) {
166+
interceptor.intercept(req);
167+
}
168+
requestCounters.computeIfAbsent(request.getUrl().getRawPath(), (url) -> new AtomicInteger())
169+
.incrementAndGet();
170+
});
171+
};
172+
}
173+
};
174+
final StorageOptions options = super.createStorageOptions(gcsClientSettings, requestCountingHttpTransportOptions);
175+
final RetrySettings.Builder retrySettingsBuilder = RetrySettings.newBuilder()
141176
.setTotalTimeout(options.getRetrySettings().getTotalTimeout())
142177
.setInitialRetryDelay(Duration.ofMillis(10L))
143178
.setRetryDelayMultiplier(1.0d)
@@ -150,7 +185,7 @@ StorageOptions createStorageOptions(
150185
retrySettingsBuilder.setMaxAttempts(maxRetries + 1);
151186
}
152187
return options.toBuilder()
153-
.setStorageRetryStrategy(StorageRetryStrategy.getLegacyStorageRetryStrategy())
188+
.setStorageRetryStrategy(getRetryStrategy())
154189
.setHost(options.getHost())
155190
.setCredentials(options.getCredentials())
156191
.setRetrySettings(retrySettingsBuilder.build())
@@ -173,6 +208,25 @@ StorageOptions createStorageOptions(
173208
return new GoogleCloudStorageBlobContainer(randomBoolean() ? BlobPath.EMPTY : BlobPath.EMPTY.add("foo"), blobStore);
174209
}
175210

211+
public void testShouldRetryOnConnectionRefused() {
212+
// port 1 should never be open
213+
endpointUrlOverride = "http://127.0.0.1:1";
214+
executeListBlobsAndAssertRetries();
215+
}
216+
217+
public void testShouldRetryOnUnresolvableHost() {
218+
// https://www.rfc-editor.org/rfc/rfc2606.html#page-2
219+
endpointUrlOverride = "http://unresolvable.invalid";
220+
executeListBlobsAndAssertRetries();
221+
}
222+
223+
private void executeListBlobsAndAssertRetries() {
224+
final int maxRetries = randomIntBetween(3, 5);
225+
final BlobContainer blobContainer = createBlobContainer(maxRetries, null, null, null, null);
226+
expectThrows(StorageException.class, () -> blobContainer.listBlobs(randomPurpose()));
227+
assertEquals(maxRetries + 1, requestCounters.get("/storage/v1/b/bucket/o").get());
228+
}
229+
176230
public void testReadLargeBlobWithRetries() throws Exception {
177231
final int maxRetries = randomIntBetween(2, 10);
178232
final AtomicInteger countDown = new AtomicInteger(maxRetries);

0 commit comments

Comments
 (0)