[CI]Add timeout exception prompt for auto_parallel ci (#72428)

Liujie0926 · web-flow · commit ddfc63074451 · 2025-04-28T14:13:10.000+08:00
* [CI]Add timeout exception prompt for auto_parallel ci

* fix codestyle

* add timeout for prepare_case
diff --git a/tools/auto_parallel/ci_auto_parallel.sh b/tools/auto_parallel/ci_auto_parallel.sh
@@ -106,26 +106,49 @@ function execute_func_list(){
         let global_total_count++
         execute_num=1
         while true; do
-            bash $1 exec_case $func_name $FLAGS_install_deps $FLAGS_download_data
+            timeout 10m bash $1 exec_case $func_name $FLAGS_install_deps $FLAGS_download_data
             result=$?
             if [ $result -eq 0 ]; then
                 echo -e "\033[32m test success!"
                 let success_count++
                 let global_success_count++
+            elif [ $result -eq 1 ]; then
+                if [ $execute_num -eq 1 ]; then
+                    echo -e "\033[31m first time execute failed, try again!"
+                    let execute_num++
+                    continue
+                else
+                    echo -e "\033[31m second time execute failed, exit!"
+                    mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
+                    echo -e "\033[31m ${log_path}/$func_name_FAIL \033"
+                    tail -15 ${log_path}/${func_name}_FAIL.log
+                    let runtime_fail_count++
+                    global_runtime_fail_arr+=("$func_name")
+                fi
             elif [ $result -eq 2 ]; then
                 echo -e "\033[31m verification failed!"
                 let verification_fail_count++
                 global_verification_fail_arr+=("$func_name")
             elif [ $result -eq 250 ]; then
                 if [ $execute_num -eq 1 ]; then
-                    echo -e "\033[31m fist time execute failed, try again!"
+                    echo -e "\033[31m first time execute failed, try again!"
                     let execute_num++
                     continue
                 else
                     echo -e "\033[31m second time execute failed, exit!"
+                    mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
+                    echo -e "\033[31m ${log_path}/$func_name_FAIL \033"
+                    tail -15 ${log_path}/${func_name}_FAIL.log
                     let exit_250_count++
                     global_exit_250_arr+=("$func_name")
                 fi
+            elif [ $result -eq 124 ]; then
+                echo "\033[31m [failed-timeout] Test case execution was terminated after exceeding the 10m limit."
+                mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
+                echo -e "\033[31m ${log_path}/$func_name_FAIL \033"
+                tail -15 ${log_path}/${func_name}_FAIL.log
+                let runtime_fail_count++
+                global_runtime_fail_arr+=("$func_name")
             else
                 echo "test failed!"
                 mv ${log_path}/$func_name ${log_path}/${func_name}_FAIL.log
@@ -210,7 +233,7 @@ if [[ ${#case_list[*]} -ne 0 ]];then
             let case_num++
         elif [[ ${case} == "llama_auto" ]];then
             cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh
-            bash $cmd prepare_case llama_case_list_auto $FLAGS_install_deps $FLAGS_download_data
+            timeout 5m bash $cmd prepare_case llama_case_list_auto $FLAGS_install_deps $FLAGS_download_data
             execute_func_list $cmd llama_auto
             # There is no need to reinstall the related packages of `PaddleNLP` afterward.
             export FLAGS_install_deps=1
@@ -221,15 +244,15 @@ if [[ ${#case_list[*]} -ne 0 ]];then
             clean_file /workspace/PaddleNLP/llm/auto_parallel/llama
         elif [[ ${case} == "gpt-3_auto" ]];then
             cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh
-            bash $cmd prepare_case llm_gpt_case_list_auto $FLAGS_install_deps $FLAGS_download_data
+            timeout 5m bash $cmd prepare_case llm_gpt_case_list_auto $FLAGS_install_deps $FLAGS_download_data
             execute_func_list $cmd gpt-3_auto
             # there is no need to repeat the `gpt` download process later.
             export FLAGS_download_data="gpt ""$FLAGS_download_data"
             let case_num++
             clean_file /workspace/PaddleNLP/llm/auto_parallel/gpt-3
         elif [[ ${case} == "gpt-3_dygraph" ]];then
             cmd=/workspace/PaddleNLP/scripts/distribute/ci_case_dy.sh
-            bash $cmd prepare_case llm_gpt_case_list_dygraph $FLAGS_install_deps $FLAGS_download_data
+            timeout 5m bash $cmd prepare_case llm_gpt_case_list_dygraph $FLAGS_install_deps $FLAGS_download_data
             execute_func_list $cmd gpt-3_dygraph
             let case_num++
             clean_file /workspace/PaddleNLP/llm