WinterGeng
diff --git a/‎examples/multimodal/stable_diffusion/README.md
100644100755
+5-1 b/‎examples/multimodal/stable_diffusion/README.md
100644100755
+5-1
diff --git a/‎examples/multimodal/stable_diffusion/infer.py
100644100755
+33-4 b/‎examples/multimodal/stable_diffusion/infer.py
100644100755
+33-4
diff --git a/‎examples/vision/detection/paddledetection/cpp/README.md
100644100755
+2 b/‎examples/vision/detection/paddledetection/cpp/README.md
100644100755
+2
diff --git a/‎examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
100644100755
+30-1 b/‎examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
100644100755
+30-1
diff --git a/‎examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
100644100755
+30-1 b/‎examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
100644100755
+30-1
diff --git a/‎examples/vision/detection/paddledetection/cpp/infer_picodet.cc
100644100755
+30-1 b/‎examples/vision/detection/paddledetection/cpp/infer_picodet.cc
100644100755
+30-1
diff --git a/‎examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
100644100755
+30-1 b/‎examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
100644100755
+30-1
diff --git a/‎examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
+30-1 b/‎examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
+30-1
@@ -37,7 +37,11 @@ python infer.py --model_dir stable-diffusion-v1-4/ --scheduler "pndm" --backend
 如果使用stable-diffusion-v1-5模型，则可执行以下命令完成推理：
 
 ```
+# GPU上推理
 python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle
+
+# 在昆仑芯XPU上推理
+python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle-xpu
 ```
 
 #### 参数说明
@@ -48,7 +52,7 @@ python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral"
 |----------|--------------|
 | --model_dir | 导出后模型的目录。 |
 | --model_format | 模型格式。默认为`'paddle'`，可选列表：`['paddle', 'onnx']`。 |
-| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
+| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle', 'paddle-xpu']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
 | --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表：`['pndm', 'euler_ancestral']`，StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/ppdiffusers/examples/textual_inversion)。|
 | --unet_model_prefix | UNet模型前缀。默认为`unet`。 |
 | --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`。 |
 
@@ -69,10 +69,7 @@ def parse_arguments():
         type=str,
         default='paddle',
         # Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
-        choices=[
-            'onnx_runtime',
-            'paddle',
-        ],
+        choices=['onnx_runtime', 'paddle', 'paddle-xpu'],
         help="The inference runtime backend of unet model and text encoder model."
     )
     parser.add_argument(
@@ -178,6 +175,24 @@ def create_trt_runtime(model_dir,
     return fd.Runtime(option)
 
 
+def create_xpu_runtime(model_dir, model_prefix, device_id=0):
+    option = fd.RuntimeOption()
+    option.use_xpu(
+        device_id,
+        l3_workspace_size=(64 * 1024 * 1024 - 4 * 1024),
+        locked=False,
+        autotune=False,
+        autotune_file="",
+        precision="int16",
+        adaptive_seqlen=True,
+        enable_multi_stream=True)
+    option.use_paddle_lite_backend()
+    model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
+    params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
+    option.set_model_path(model_file, params_file)
+    return fd.Runtime(option)
+
+
 def get_scheduler(args):
     if args.scheduler == "pndm":
         scheduler = PNDMScheduler(
@@ -291,6 +306,20 @@ def get_scheduler(args):
             dynamic_shape=unet_dynamic_shape,
             device_id=args.device_id)
         print(f"Spend {time.time() - start : .2f} s to load unet model.")
+    elif args.backend == "paddle-xpu":
+        print("=== build text_encoder_runtime")
+        text_encoder_runtime = create_xpu_runtime(
+            args.model_dir,
+            args.text_encoder_model_prefix,
+            device_id=args.device_id)
+        print("=== build vae_decoder_runtime")
+        vae_decoder_runtime = create_xpu_runtime(
+            args.model_dir, args.vae_model_prefix, device_id=args.device_id)
+        print("=== build unet_runtime")
+        start = time.time()
+        unet_runtime = create_xpu_runtime(
+            args.model_dir, args.unet_model_prefix, device_id=args.device_id)
+        print(f"Spend {time.time() - start : .2f} s to load unet model.")
     pipe = StableDiffusionFastDeployPipeline(
         vae_decoder_runtime=vae_decoder_runtime,
         text_encoder_runtime=text_encoder_runtime,
 
@@ -32,6 +32,8 @@ tar xvf ppyoloe_crn_l_300e_coco.tgz
 ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
 # GPU上TensorRT推理
 ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 2
+# 昆仑芯XPU推理
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 3
 ```
 
 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
 
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu(0, 0, false, false);
+  auto model = fastdeploy::vision::detection::FasterRCNN(
+      model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
   auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
            "e.g ./infer_model ./faster_rcnn_r50_vd_fpn_2x_coco ./test.jpeg 0"
         << std::endl;
     std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
               << std::endl;
     return -1;
   }
@@ -91,6 +118,8 @@ int main(int argc, char* argv[]) {
     CpuInfer(argv[1], argv[2]);
   } else if (std::atoi(argv[3]) == 1) {
     GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
   }
   return 0;
 }
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu(0, 0, false, false);
+  auto model = fastdeploy::vision::detection::MaskRCNN(model_file, params_file,
+                                                       config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
   auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
            "e.g ./infer_model ./mask_rcnn_r50_1x_coco/ ./test.jpeg 0"
         << std::endl;
     std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
               << std::endl;
     return -1;
   }
@@ -92,6 +119,8 @@ int main(int argc, char* argv[]) {
   } else if (std::atoi(argv[3]) == 1) {
     GpuInfer(argv[1], argv[2]);
   } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 3) {
     std::cout
         << "Backend::TRT has not been supported yet, will skip this inference."
         << std::endl;
 
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PicoDet(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
   auto params_file = model_dir + sep + "model.pdiparams";
@@ -111,7 +138,7 @@ int main(int argc, char* argv[]) {
            "e.g ./infer_model ./picodet_model_dir ./test.jpeg 0"
         << std::endl;
     std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
               << std::endl;
     return -1;
   }
@@ -122,6 +149,8 @@ int main(int argc, char* argv[]) {
     GpuInfer(argv[1], argv[2]);
   } else if (std::atoi(argv[3]) == 2) {
     TrtInfer(argv[1], argv[2]);
+  }  else if (std::atoi(argv[3]) == 3) {
+    XpuInfer(argv[1], argv[2]);
   }
   return 0;
 }
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PPYOLO(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
   auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
            "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
         << std::endl;
     std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
               << std::endl;
     return -1;
   }
@@ -91,6 +118,8 @@ int main(int argc, char* argv[]) {
     CpuInfer(argv[1], argv[2]);
   } else if (std::atoi(argv[3]) == 1) {
     GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
   }
   return 0;
 }
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
   auto params_file = model_dir + sep + "model.pdiparams";
@@ -111,7 +138,7 @@ int main(int argc, char* argv[]) {
            "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0"
         << std::endl;
     std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
               << std::endl;
     return -1;
   }
@@ -122,6 +149,8 @@ int main(int argc, char* argv[]) {
     GpuInfer(argv[1], argv[2]);
   } else if (std::atoi(argv[3]) == 2) {
     TrtInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 3) {
+    XpuInfer(argv[1], argv[2]);
   }
   return 0;
 }