1
1
#! /bin/bash
2
- # Copyright (c) 2021-2022 , NVIDIA CORPORATION. All rights reserved.
2
+ # Copyright (c) 2021-2025 , NVIDIA CORPORATION. All rights reserved.
3
3
#
4
4
# Redistribution and use in source and binary forms, with or without
5
5
# modification, are permitted provided that the following conditions
@@ -56,8 +56,12 @@ rm -f *.out
56
56
57
57
SAGEMAKER_TEST=sagemaker_test.py
58
58
SAGEMAKER_MULTI_MODEL_TEST=sagemaker_multi_model_test.py
59
+ SAGEMAKER_GENERATE_TEST=sagemaker_generate_test.py
60
+ SAGEMAKER_GENERATE_STREAM_TEST=sagemaker_generate_stream_test.py
59
61
MULTI_MODEL_UNIT_TEST_COUNT=7
60
62
UNIT_TEST_COUNT=9
63
+ GENERATE_UNIT_TEST_COUNT=1
64
+ GENERATE_STREAM_UNIT_TEST_COUNT=1
61
65
CLIENT_LOG=" ./client.log"
62
66
63
67
DATADIR=/data/inferenceserver/${REPO_VERSION}
@@ -74,6 +78,10 @@ mkdir models && \
74
78
rm -r models/sm_model/2 && rm -r models/sm_model/3 && \
75
79
sed -i " s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt
76
80
81
+ mkdir -p models/mock_llm/1 && \
82
+ cp ../python_models/generate_models/mock_llm/1/model.py models/mock_llm/1 && \
83
+ cp ../python_models/generate_models/mock_llm/config.pbtxt models/mock_llm
84
+
77
85
# Use SageMaker's ping endpoint to check server status
78
86
# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
79
87
# success, 1 on failure
@@ -259,12 +267,115 @@ else
259
267
fi
260
268
set -e
261
269
262
- unset SAGEMAKER_SAFE_PORT_RANGE
270
+ kill $SERVER_PID
271
+ wait $SERVE_PID
272
+
273
+ # Start server with LLM and set inference type to generate
274
+ export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=mock_llm
275
+ export SAGEMAKER_TRITON_INFERENCE_TYPE=generate
276
+ serve > $SERVER_LOG 2>&1 &
277
+ SERVE_PID=$!
278
+ # Obtain Triton PID in such way as $! will return the script PID
279
+ sleep 1
280
+ SERVER_PID=` ps | grep tritonserver | awk ' { printf $1 }' `
281
+ sagemaker_wait_for_server_ready $SERVER_PID 10
282
+ if [ " $WAIT_RET " != " 0" ]; then
283
+ echo -e " \n***\n*** Failed to start $SERVER \n***"
284
+ kill $SERVER_PID || true
285
+ cat $SERVER_LOG
286
+ exit 1
287
+ fi
288
+
289
+ # Inference with generate inference type
290
+ set +e
291
+ python $SAGEMAKER_GENERATE_TEST SageMakerGenerateTest >> $CLIENT_LOG 2>&1
292
+ if [ $? -ne 0 ]; then
293
+ echo -e " \n***\n*** Test Failed\n***"
294
+ cat $CLIENT_LOG
295
+ RET=1
296
+ else
297
+ check_test_results $TEST_RESULT_FILE $GENERATE_UNIT_TEST_COUNT
298
+ if [ $? -ne 0 ]; then
299
+ cat $CLIENT_LOG
300
+ echo -e " \n***\n*** Test Result Verification Failed\n***"
301
+ RET=1
302
+ fi
303
+ fi
304
+ set -e
305
+
306
+ unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
307
+ unset SAGEMAKER_TRITON_INFERENCE_TYPE
308
+
309
+ kill $SERVER_PID
310
+ wait $SERVE_PID
311
+
312
+ # Start server with LLM and set inference type to generate_stream
313
+ export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=mock_llm
314
+ export SAGEMAKER_TRITON_INFERENCE_TYPE=generate_stream
315
+ serve > $SERVER_LOG 2>&1 &
316
+ SERVE_PID=$!
317
+ # Obtain Triton PID in such way as $! will return the script PID
318
+ sleep 1
319
+ SERVER_PID=` ps | grep tritonserver | awk ' { printf $1 }' `
320
+ sagemaker_wait_for_server_ready $SERVER_PID 10
321
+ if [ " $WAIT_RET " != " 0" ]; then
322
+ echo -e " \n***\n*** Failed to start $SERVER \n***"
323
+ kill $SERVER_PID || true
324
+ cat $SERVER_LOG
325
+ exit 1
326
+ fi
327
+
328
+ # Helper library to parse SSE events
329
+ # https://github.com/mpetazzoni/sseclient
330
+ pip install sseclient-py
331
+
332
+ # Inference with generate_stream inference type
333
+ set +e
334
+ python $SAGEMAKER_GENERATE_STREAM_TEST SageMakerGenerateStreamTest >> $CLIENT_LOG 2>&1
335
+ if [ $? -ne 0 ]; then
336
+ echo -e " \n***\n*** Test Failed\n***"
337
+ cat $CLIENT_LOG
338
+ RET=1
339
+ else
340
+ check_test_results $TEST_RESULT_FILE $GENERATE_STREAM_UNIT_TEST_COUNT
341
+ if [ $? -ne 0 ]; then
342
+ cat $CLIENT_LOG
343
+ echo -e " \n***\n*** Test Result Verification Failed\n***"
344
+ RET=1
345
+ fi
346
+ fi
347
+ set -e
348
+
263
349
unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
350
+ unset SAGEMAKER_TRITON_INFERENCE_TYPE
264
351
265
352
kill $SERVER_PID
266
353
wait $SERVE_PID
267
354
355
+ # Test serve with incorrect inference type
356
+ export SAGEMAKER_TRITON_INFERENCE_TYPE=incorrect_inference_type
357
+ serve > $SERVER_LOG 2>&1 &
358
+ SERVE_PID=$!
359
+ # Obtain Triton PID in such way as $! will return the script PID
360
+ sleep 1
361
+ SERVER_PID=` ps | grep tritonserver | awk ' { printf $1 }' `
362
+ if [ -n " $SERVER_PID " ]; then
363
+ echo -e " \n***\n*** Expect failed to start $SERVER \n***"
364
+ kill $SERVER_PID || true
365
+ cat $SERVER_LOG
366
+ RET=1
367
+ else
368
+ grep " ERROR: Invalid SAGEMAKER_TRITON_INFERENCE_TYPE" $SERVER_LOG
369
+ if [ $? -ne 0 ]; then
370
+ echo -e " \n***\n*** Failed. Expected error on incorrect inference type\n***"
371
+ RET=1
372
+ fi
373
+ fi
374
+ unset SAGEMAKER_TRITON_INFERENCE_TYPE
375
+
376
+ unset SAGEMAKER_SAFE_PORT_RANGE
377
+ unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
378
+
268
379
# Test serve with incorrect model name
269
380
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=incorrect_model_name
270
381
serve > $SERVER_LOG 2>&1 &
288
399
unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
289
400
290
401
# Test serve with SAGEMAKER_TRITON_DEFAULT_MODEL_NAME unset, but containing single model directory
402
+ rm -rf models/mock_llm
291
403
serve > $SERVER_LOG 2>&1 &
292
404
SERVE_PID=$!
293
405
# Obtain Triton PID in such way as $! will return the script PID
0 commit comments