Skip to content

Commit 20c2c66

Browse files
esevanlresende
authored andcommitted
Attempt to re-establish websocket connection to Gateway
When notebook (with `--gateway-url` option) lost the connection to Gateway, notebook didn't connect to Gateway again although the websocket connection from the client was still alive. This change recovers the connection to Gateway to prevent this anomaly. Signed-off-by: Eunsoo Park <[email protected]>
1 parent 5c43809 commit 20c2c66

File tree

1 file changed

+18
-8
lines changed

1 file changed

+18
-8
lines changed

notebook/gateway/handlers.py

+18-8
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,12 @@ def __init__(self, **kwargs):
130130
self.kernel_id = None
131131
self.ws = None
132132
self.ws_future = Future()
133-
self.ws_future_cancelled = False
133+
self.disconnected = False
134134

135135
@gen.coroutine
136136
def _connect(self, kernel_id):
137+
# websocket is initialized before connection
138+
self.ws = None
137139
self.kernel_id = kernel_id
138140
ws_url = url_path_join(
139141
GatewayClient.instance().ws_url,
@@ -148,40 +150,48 @@ def _connect(self, kernel_id):
148150
self.ws_future.add_done_callback(self._connection_done)
149151

150152
def _connection_done(self, fut):
151-
if not self.ws_future_cancelled: # prevent concurrent.futures._base.CancelledError
153+
if not self.disconnected and fut.exception() is None: # prevent concurrent.futures._base.CancelledError
152154
self.ws = fut.result()
153155
self.log.debug("Connection is ready: ws: {}".format(self.ws))
154156
else:
155-
self.log.warning("Websocket connection has been cancelled via client disconnect before its establishment. "
157+
self.log.warning("Websocket connection has been closed via client disconnect or due to error. "
156158
"Kernel with ID '{}' may not be terminated on GatewayClient: {}".
157159
format(self.kernel_id, GatewayClient.instance().url))
158160

159161
def _disconnect(self):
162+
self.disconnected = True
160163
if self.ws is not None:
161164
# Close connection
162165
self.ws.close()
163166
elif not self.ws_future.done():
164167
# Cancel pending connection. Since future.cancel() is a noop on tornado, we'll track cancellation locally
165168
self.ws_future.cancel()
166-
self.ws_future_cancelled = True
167-
self.log.debug("_disconnect: ws_future_cancelled: {}".format(self.ws_future_cancelled))
169+
self.log.debug("_disconnect: future cancelled, disconnected: {}".format(self.disconnected))
168170

169171
@gen.coroutine
170172
def _read_messages(self, callback):
171173
"""Read messages from gateway server."""
172-
while True:
174+
while self.ws is not None:
173175
message = None
174-
if not self.ws_future_cancelled:
176+
if not self.disconnected:
175177
try:
176178
message = yield self.ws.read_message()
177179
except Exception as e:
178180
self.log.error("Exception reading message from websocket: {}".format(e)) # , exc_info=True)
179181
if message is None:
182+
if not self.disconnected:
183+
self.log.warning("Lost connection to Gateway: {}".format(self.kernel_id))
180184
break
181185
callback(message) # pass back to notebook client (see self.on_open and WebSocketChannelsHandler.open)
182186
else: # ws cancelled - stop reading
183187
break
184188

189+
if not self.disconnected: # if websocket is not disconnected by client, attept to reconnect to Gateway
190+
self.log.info("Attempting to re-establish the connection to Gateway: {}".format(self.kernel_id))
191+
self._connect(self.kernel_id)
192+
loop = IOLoop.current()
193+
loop.add_future(self.ws_future, lambda future: self._read_messages(callback))
194+
185195
def on_open(self, kernel_id, message_callback, **kwargs):
186196
"""Web socket connection open against gateway server."""
187197
self._connect(kernel_id)
@@ -205,7 +215,7 @@ def on_message(self, message):
205215
def _write_message(self, message):
206216
"""Send message to gateway server."""
207217
try:
208-
if not self.ws_future_cancelled:
218+
if not self.disconnected and self.ws is not None:
209219
self.ws.write_message(message)
210220
except Exception as e:
211221
self.log.error("Exception writing message to websocket: {}".format(e)) # , exc_info=True)

0 commit comments

Comments
 (0)