Skip to content

Commit 5ff5e98

Browse files
authored
[BFN] Update psu.py to process sigterm signal (sonic-net#13350)
Why I did it Sometime, SIGTERM processing by psud takes more then default 10sec (please see stopwaitsecs in http://supervisord.org/configuration.html). Due to this, the following two testcases may fail: test_pmon_psud_stop_and_start_status test_pmon_psud_term_and_start_status How I did it Update PSU plugin to process sigterm signal so that psud runs faster to end last cycle in time How to verify it Run SONiC CTs: test_pmon_psud_stop_and_start_status test_pmon_psud_term_and_start_status
1 parent 8fdbf9d commit 5ff5e98

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

platform/barefoot/sonic-platform-modules-bfn-montara/sonic_platform/platform_utils.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
except ImportError as e:
1010
raise ImportError(str(e) + "- required module not found")
1111

12+
SIGTERM_CAUGHT = False
13+
1214
def file_create(path, mode=None):
1315
"""
1416
Ensure that file is created with the appropriate permissions
@@ -38,13 +40,16 @@ def wrapper(*args, **kwargs):
3840
def handler(sig, frame):
3941
if sigterm_handler:
4042
sigterm_handler(sig, frame)
43+
global SIGTERM_CAUGHT
44+
SIGTERM_CAUGHT = True
4145
raise Exception("Canceling {}() execution...".format(func.__name__))
4246

4347
sigterm_handler = signal.getsignal(signal.SIGTERM)
4448
signal.signal(signal.SIGTERM, handler)
4549
result = None
4650
try:
47-
result = func(*args, **kwargs)
51+
if not SIGTERM_CAUGHT:
52+
result = func(*args, **kwargs)
4853
finally:
4954
signal.signal(signal.SIGTERM, sigterm_handler)
5055
return result

platform/barefoot/sonic-platform-modules-bfn-montara/sonic_platform/psu.py

+30-5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ class Psu(PsuBase):
2828
__sensors_info = None
2929
__timestamp = 0
3030

31+
# When psud gets termination signal it starts processing last cycle.
32+
# This cycle must be as fast as possible to be able to stop correctly,
33+
# otherwise it will be killed, so the whole plugin must encounter
34+
# this signal to process operations based on state, where the
35+
# state is "termination signal got" and "no termination signal"
36+
37+
# State is "no termination signal"
3138
sigterm = False
3239
sigterm_default_handler = None
3340
cls_inited = False
@@ -54,12 +61,15 @@ def signal_handler(cls, sig, frame):
5461
if cls.sigterm_default_handler:
5562
cls.sigterm_default_handler(sig, frame)
5663
syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...")
64+
# Changing state to "termination signal"
5765
cls.sigterm = True
5866

5967
@classmethod
6068
def __sensors_get(cls, cached=True):
6169
cls.__lock.acquire()
62-
if time.time() > cls.__timestamp + 15:
70+
# Operation may take a few seconds to process, so if state is
71+
# "termination signal", plugin doesn't perform this operation
72+
if time.time() > cls.__timestamp + 15 and not Psu.sigterm:
6373
# Update cache once per 15 seconds
6474
try:
6575
cls.__sensors_info = get_psu_metrics()
@@ -83,6 +93,8 @@ def __info_get(self):
8393
def psu_info_get(client):
8494
return client.pltfm_mgr.pltfm_mgr_pwr_supply_info_get(self.__index)
8595

96+
# Operation may take a few seconds to process, so if state is
97+
# "termination signal", plugin doesn't perform this operation
8698
# Update cache once per 2 seconds
8799
if self.__ts + 2 < time.time() and not Psu.sigterm:
88100
self.__info = None
@@ -96,6 +108,10 @@ def psu_info_get(client):
96108
return self.__info
97109
return self.__info
98110

111+
@cancel_on_sigterm
112+
def get_metric_value(self, metric_name):
113+
return get_metric_value(Psu.__sensors_get(), "PSU%d ".format(self.__index) + metric_name)
114+
99115
@staticmethod
100116
def get_num_psus():
101117
"""
@@ -127,7 +143,7 @@ def get_voltage(self):
127143
A float number, the output voltage in volts,
128144
e.g. 12.1
129145
"""
130-
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Voltage_in1_input" % self.__index)
146+
return self.get_metric_value("12V Output Voltage_in1_input")
131147

132148
def get_current(self):
133149
"""
@@ -136,7 +152,7 @@ def get_current(self):
136152
Returns:
137153
A float number, the electric current in amperes, e.g 15.4
138154
"""
139-
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Current_curr2_input" % self.__index)
155+
return self.get_metric_value("12V Output Current_curr2_input")
140156

141157
def get_input_voltage(self):
142158
"""
@@ -145,15 +161,15 @@ def get_input_voltage(self):
145161
A float number, the input voltage in volts,
146162
e.g. 220
147163
"""
148-
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Voltage_in0_input" % self.__index)
164+
return self.get_metric_value("Input Voltage_in0_input")
149165

150166
def get_input_current(self):
151167
"""
152168
Retrieves the input current draw of the power supply
153169
Returns:
154170
A float number, the electric current in amperes, e.g 0.8
155171
"""
156-
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Current_curr1_input" % self.__index)
172+
return self.get_metric_value("Input Current_curr1_input")
157173

158174
def get_power(self):
159175
"""
@@ -177,6 +193,9 @@ def psu_present_get(client):
177193
return client.pltfm_mgr.pltfm_mgr_pwr_supply_present_get(self.__index)
178194

179195
status = False
196+
if Psu.sigterm:
197+
return status
198+
180199
try:
181200
status = thrift_try(psu_present_get, attempts=1)
182201
except Exception as e:
@@ -267,22 +286,28 @@ def get_position_in_parent(self):
267286
"""
268287
return self.__index
269288

289+
@cancel_on_sigterm
270290
def get_temperature(self):
271291
"""
272292
Retrieves current temperature reading from PSU
273293
Returns:
274294
A float number of current temperature in Celsius up to nearest thousandth
275295
of one degree Celsius, e.g. 30.125
276296
"""
297+
# Operation may take a few seconds to process, so if state is
298+
# "termination signal", plugin doesn't perform this operation
277299
return self.get_thermal(0).get_temperature()
278300

301+
@cancel_on_sigterm
279302
def get_temperature_high_threshold(self):
280303
"""
281304
Retrieves the high threshold temperature of PSU
282305
Returns:
283306
A float number, the high threshold temperature of PSU in Celsius
284307
up to nearest thousandth of one degree Celsius, e.g. 30.125
285308
"""
309+
# Operation may take a few seconds to process, so if state is
310+
# "termination signal", plugin doesn't perform this operation
286311
return self.get_thermal(0).get_high_threshold()
287312

288313
@property

0 commit comments

Comments
 (0)