Skip to content

Commit ddfc630

Browse files
authored
[CI]Add timeout exception prompt for auto_parallel ci (#72428)
* [CI]Add timeout exception prompt for auto_parallel ci * fix codestyle * add timeout for prepare_case
1 parent dd7fd37 commit ddfc630

File tree

1 file changed

+28
-5
lines changed

1 file changed

+28
-5
lines changed

tools/auto_parallel/ci_auto_parallel.sh

+28-5
Original file line numberDiff line numberDiff line change
@@ -106,26 +106,49 @@ function execute_func_list(){
106106
let global_total_count++
107107
execute_num=1
108108
while true; do
109-
bash $1 exec_case $func_name $FLAGS_install_deps $FLAGS_download_data
109+
timeout 10m bash $1 exec_case $func_name $FLAGS_install_deps $FLAGS_download_data
110110
result=$?
111111
if [ $result -eq 0 ]; then
112112
echo -e "\033[32m test success!"
113113
let success_count++
114114
let global_success_count++
115+
elif [ $result -eq 1 ]; then
116+
if [ $execute_num -eq 1 ]; then
117+
echo -e "\033[31m first time execute failed, try again!"
118+
let execute_num++
119+
continue
120+
else
121+
echo -e "\033[31m second time execute failed, exit!"
122+
mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
123+
echo -e "\033[31m ${log_path}/$func_name_FAIL \033"
124+
tail -15 ${log_path}/${func_name}_FAIL.log
125+
let runtime_fail_count++
126+
global_runtime_fail_arr+=("$func_name")
127+
fi
115128
elif [ $result -eq 2 ]; then
116129
echo -e "\033[31m verification failed!"
117130
let verification_fail_count++
118131
global_verification_fail_arr+=("$func_name")
119132
elif [ $result -eq 250 ]; then
120133
if [ $execute_num -eq 1 ]; then
121-
echo -e "\033[31m fist time execute failed, try again!"
134+
echo -e "\033[31m first time execute failed, try again!"
122135
let execute_num++
123136
continue
124137
else
125138
echo -e "\033[31m second time execute failed, exit!"
139+
mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
140+
echo -e "\033[31m ${log_path}/$func_name_FAIL \033"
141+
tail -15 ${log_path}/${func_name}_FAIL.log
126142
let exit_250_count++
127143
global_exit_250_arr+=("$func_name")
128144
fi
145+
elif [ $result -eq 124 ]; then
146+
echo "\033[31m [failed-timeout] Test case execution was terminated after exceeding the 10m limit."
147+
mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
148+
echo -e "\033[31m ${log_path}/$func_name_FAIL \033"
149+
tail -15 ${log_path}/${func_name}_FAIL.log
150+
let runtime_fail_count++
151+
global_runtime_fail_arr+=("$func_name")
129152
else
130153
echo "test failed!"
131154
mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
@@ -210,7 +233,7 @@ if [[ ${#case_list[*]} -ne 0 ]];then
210233
let case_num++
211234
elif [[ ${case} == "llama_auto" ]];then
212235
cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh
213-
bash $cmd prepare_case llama_case_list_auto $FLAGS_install_deps $FLAGS_download_data
236+
timeout 5m bash $cmd prepare_case llama_case_list_auto $FLAGS_install_deps $FLAGS_download_data
214237
execute_func_list $cmd llama_auto
215238
# There is no need to reinstall the related packages of `PaddleNLP` afterward.
216239
export FLAGS_install_deps=1
@@ -221,15 +244,15 @@ if [[ ${#case_list[*]} -ne 0 ]];then
221244
clean_file /workspace/PaddleNLP/llm/auto_parallel/llama
222245
elif [[ ${case} == "gpt-3_auto" ]];then
223246
cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh
224-
bash $cmd prepare_case llm_gpt_case_list_auto $FLAGS_install_deps $FLAGS_download_data
247+
timeout 5m bash $cmd prepare_case llm_gpt_case_list_auto $FLAGS_install_deps $FLAGS_download_data
225248
execute_func_list $cmd gpt-3_auto
226249
# there is no need to repeat the `gpt` download process later.
227250
export FLAGS_download_data="gpt ""$FLAGS_download_data"
228251
let case_num++
229252
clean_file /workspace/PaddleNLP/llm/auto_parallel/gpt-3
230253
elif [[ ${case} == "gpt-3_dygraph" ]];then
231254
cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_dy.sh
232-
bash $cmd prepare_case llm_gpt_case_list_dygraph $FLAGS_install_deps $FLAGS_download_data
255+
timeout 5m bash $cmd prepare_case llm_gpt_case_list_dygraph $FLAGS_install_deps $FLAGS_download_data
233256
execute_func_list $cmd gpt-3_dygraph
234257
let case_num++
235258
clean_file /workspace/PaddleNLP/llm

0 commit comments

Comments
 (0)