Skip to content

Commit 1911002

Browse files
[Backend]Add stable_diffusion and detection models support for KunlunXin XPU (PaddlePaddle#954)
* [FlyCV] Bump up FlyCV -> official release 1.0.0 * add valid_xpu for detection * add paddledetection model support for xpu * support all detection model in c++ and python * fix code * add python stable_diffusion support Co-authored-by: DefTruth <[email protected]> Co-authored-by: DefTruth <[email protected]>
1 parent 8a986c2 commit 1911002

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+857
-38
lines changed

examples/multimodal/stable_diffusion/README.md

100644100755
+5-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@ python infer.py --model_dir stable-diffusion-v1-4/ --scheduler "pndm" --backend
3737
如果使用stable-diffusion-v1-5模型,则可执行以下命令完成推理:
3838

3939
```
40+
# GPU上推理
4041
python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle
42+
43+
# 在昆仑芯XPU上推理
44+
python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle-xpu
4145
```
4246

4347
#### 参数说明
@@ -48,7 +52,7 @@ python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral"
4852
|----------|--------------|
4953
| --model_dir | 导出后模型的目录。 |
5054
| --model_format | 模型格式。默认为`'paddle'`,可选列表:`['paddle', 'onnx']`|
51-
| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`|
55+
| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle', 'paddle-xpu']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`|
5256
| --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表:`['pndm', 'euler_ancestral']`,StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/ppdiffusers/examples/textual_inversion)|
5357
| --unet_model_prefix | UNet模型前缀。默认为`unet`|
5458
| --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`|

examples/multimodal/stable_diffusion/infer.py

100644100755
+33-4
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,7 @@ def parse_arguments():
6969
type=str,
7070
default='paddle',
7171
# Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
72-
choices=[
73-
'onnx_runtime',
74-
'paddle',
75-
],
72+
choices=['onnx_runtime', 'paddle', 'paddle-xpu'],
7673
help="The inference runtime backend of unet model and text encoder model."
7774
)
7875
parser.add_argument(
@@ -178,6 +175,24 @@ def create_trt_runtime(model_dir,
178175
return fd.Runtime(option)
179176

180177

178+
def create_xpu_runtime(model_dir, model_prefix, device_id=0):
179+
option = fd.RuntimeOption()
180+
option.use_xpu(
181+
device_id,
182+
l3_workspace_size=(64 * 1024 * 1024 - 4 * 1024),
183+
locked=False,
184+
autotune=False,
185+
autotune_file="",
186+
precision="int16",
187+
adaptive_seqlen=True,
188+
enable_multi_stream=True)
189+
option.use_paddle_lite_backend()
190+
model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
191+
params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
192+
option.set_model_path(model_file, params_file)
193+
return fd.Runtime(option)
194+
195+
181196
def get_scheduler(args):
182197
if args.scheduler == "pndm":
183198
scheduler = PNDMScheduler(
@@ -291,6 +306,20 @@ def get_scheduler(args):
291306
dynamic_shape=unet_dynamic_shape,
292307
device_id=args.device_id)
293308
print(f"Spend {time.time() - start : .2f} s to load unet model.")
309+
elif args.backend == "paddle-xpu":
310+
print("=== build text_encoder_runtime")
311+
text_encoder_runtime = create_xpu_runtime(
312+
args.model_dir,
313+
args.text_encoder_model_prefix,
314+
device_id=args.device_id)
315+
print("=== build vae_decoder_runtime")
316+
vae_decoder_runtime = create_xpu_runtime(
317+
args.model_dir, args.vae_model_prefix, device_id=args.device_id)
318+
print("=== build unet_runtime")
319+
start = time.time()
320+
unet_runtime = create_xpu_runtime(
321+
args.model_dir, args.unet_model_prefix, device_id=args.device_id)
322+
print(f"Spend {time.time() - start : .2f} s to load unet model.")
294323
pipe = StableDiffusionFastDeployPipeline(
295324
vae_decoder_runtime=vae_decoder_runtime,
296325
text_encoder_runtime=text_encoder_runtime,

examples/vision/detection/paddledetection/cpp/README.md

100644100755
+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ tar xvf ppyoloe_crn_l_300e_coco.tgz
3232
./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
3333
# GPU上TensorRT推理
3434
./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 2
35+
# 昆仑芯XPU推理
36+
./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 3
3537
```
3638

3739
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:

examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc

100644100755
+30-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
4747
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
4848
}
4949

50+
void XpuInfer(const std::string& model_dir, const std::string& image_file) {
51+
auto model_file = model_dir + sep + "model.pdmodel";
52+
auto params_file = model_dir + sep + "model.pdiparams";
53+
auto config_file = model_dir + sep + "infer_cfg.yml";
54+
auto option = fastdeploy::RuntimeOption();
55+
option.UseXpu(0, 0, false, false);
56+
auto model = fastdeploy::vision::detection::FasterRCNN(
57+
model_file, params_file, config_file, option);
58+
if (!model.Initialized()) {
59+
std::cerr << "Failed to initialize." << std::endl;
60+
return;
61+
}
62+
63+
auto im = cv::imread(image_file);
64+
65+
fastdeploy::vision::DetectionResult res;
66+
if (!model.Predict(im, &res)) {
67+
std::cerr << "Failed to predict." << std::endl;
68+
return;
69+
}
70+
71+
std::cout << res.Str() << std::endl;
72+
auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
73+
cv::imwrite("vis_result.jpg", vis_im);
74+
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
75+
}
76+
5077
void GpuInfer(const std::string& model_dir, const std::string& image_file) {
5178
auto model_file = model_dir + sep + "model.pdmodel";
5279
auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
82109
"e.g ./infer_model ./faster_rcnn_r50_vd_fpn_2x_coco ./test.jpeg 0"
83110
<< std::endl;
84111
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
85-
"with gpu."
112+
"with gpu; 2: run with xpu."
86113
<< std::endl;
87114
return -1;
88115
}
@@ -91,6 +118,8 @@ int main(int argc, char* argv[]) {
91118
CpuInfer(argv[1], argv[2]);
92119
} else if (std::atoi(argv[3]) == 1) {
93120
GpuInfer(argv[1], argv[2]);
121+
} else if (std::atoi(argv[3]) == 2) {
122+
XpuInfer(argv[1], argv[2]);
94123
}
95124
return 0;
96125
}

examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc

100644100755
+30-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
4747
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
4848
}
4949

50+
void XpuInfer(const std::string& model_dir, const std::string& image_file) {
51+
auto model_file = model_dir + sep + "model.pdmodel";
52+
auto params_file = model_dir + sep + "model.pdiparams";
53+
auto config_file = model_dir + sep + "infer_cfg.yml";
54+
auto option = fastdeploy::RuntimeOption();
55+
option.UseXpu(0, 0, false, false);
56+
auto model = fastdeploy::vision::detection::MaskRCNN(model_file, params_file,
57+
config_file, option);
58+
if (!model.Initialized()) {
59+
std::cerr << "Failed to initialize." << std::endl;
60+
return;
61+
}
62+
63+
auto im = cv::imread(image_file);
64+
65+
fastdeploy::vision::DetectionResult res;
66+
if (!model.Predict(im, &res)) {
67+
std::cerr << "Failed to predict." << std::endl;
68+
return;
69+
}
70+
71+
std::cout << res.Str() << std::endl;
72+
auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
73+
cv::imwrite("vis_result.jpg", vis_im);
74+
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
75+
}
76+
5077
void GpuInfer(const std::string& model_dir, const std::string& image_file) {
5178
auto model_file = model_dir + sep + "model.pdmodel";
5279
auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
82109
"e.g ./infer_model ./mask_rcnn_r50_1x_coco/ ./test.jpeg 0"
83110
<< std::endl;
84111
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
85-
"with gpu."
112+
"with gpu; 2: run with xpu."
86113
<< std::endl;
87114
return -1;
88115
}
@@ -92,6 +119,8 @@ int main(int argc, char* argv[]) {
92119
} else if (std::atoi(argv[3]) == 1) {
93120
GpuInfer(argv[1], argv[2]);
94121
} else if (std::atoi(argv[3]) == 2) {
122+
XpuInfer(argv[1], argv[2]);
123+
} else if (std::atoi(argv[3]) == 3) {
95124
std::cout
96125
<< "Backend::TRT has not been supported yet, will skip this inference."
97126
<< std::endl;

examples/vision/detection/paddledetection/cpp/infer_picodet.cc

100644100755
+30-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
4747
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
4848
}
4949

50+
void XpuInfer(const std::string& model_dir, const std::string& image_file) {
51+
auto model_file = model_dir + sep + "model.pdmodel";
52+
auto params_file = model_dir + sep + "model.pdiparams";
53+
auto config_file = model_dir + sep + "infer_cfg.yml";
54+
auto option = fastdeploy::RuntimeOption();
55+
option.UseXpu();
56+
auto model = fastdeploy::vision::detection::PicoDet(model_file, params_file,
57+
config_file, option);
58+
if (!model.Initialized()) {
59+
std::cerr << "Failed to initialize." << std::endl;
60+
return;
61+
}
62+
63+
auto im = cv::imread(image_file);
64+
65+
fastdeploy::vision::DetectionResult res;
66+
if (!model.Predict(im, &res)) {
67+
std::cerr << "Failed to predict." << std::endl;
68+
return;
69+
}
70+
71+
std::cout << res.Str() << std::endl;
72+
auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
73+
cv::imwrite("vis_result.jpg", vis_im);
74+
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
75+
}
76+
5077
void GpuInfer(const std::string& model_dir, const std::string& image_file) {
5178
auto model_file = model_dir + sep + "model.pdmodel";
5279
auto params_file = model_dir + sep + "model.pdiparams";
@@ -111,7 +138,7 @@ int main(int argc, char* argv[]) {
111138
"e.g ./infer_model ./picodet_model_dir ./test.jpeg 0"
112139
<< std::endl;
113140
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
114-
"with gpu; 2: run with gpu and use tensorrt backend."
141+
"with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
115142
<< std::endl;
116143
return -1;
117144
}
@@ -122,6 +149,8 @@ int main(int argc, char* argv[]) {
122149
GpuInfer(argv[1], argv[2]);
123150
} else if (std::atoi(argv[3]) == 2) {
124151
TrtInfer(argv[1], argv[2]);
152+
} else if (std::atoi(argv[3]) == 3) {
153+
XpuInfer(argv[1], argv[2]);
125154
}
126155
return 0;
127156
}

examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc

100644100755
+30-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
4747
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
4848
}
4949

50+
void XpuInfer(const std::string& model_dir, const std::string& image_file) {
51+
auto model_file = model_dir + sep + "model.pdmodel";
52+
auto params_file = model_dir + sep + "model.pdiparams";
53+
auto config_file = model_dir + sep + "infer_cfg.yml";
54+
auto option = fastdeploy::RuntimeOption();
55+
option.UseXpu();
56+
auto model = fastdeploy::vision::detection::PPYOLO(model_file, params_file,
57+
config_file, option);
58+
if (!model.Initialized()) {
59+
std::cerr << "Failed to initialize." << std::endl;
60+
return;
61+
}
62+
63+
auto im = cv::imread(image_file);
64+
65+
fastdeploy::vision::DetectionResult res;
66+
if (!model.Predict(im, &res)) {
67+
std::cerr << "Failed to predict." << std::endl;
68+
return;
69+
}
70+
71+
std::cout << res.Str() << std::endl;
72+
auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
73+
cv::imwrite("vis_result.jpg", vis_im);
74+
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
75+
}
76+
5077
void GpuInfer(const std::string& model_dir, const std::string& image_file) {
5178
auto model_file = model_dir + sep + "model.pdmodel";
5279
auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
82109
"e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
83110
<< std::endl;
84111
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
85-
"with gpu."
112+
"with gpu; 2: run with xpu."
86113
<< std::endl;
87114
return -1;
88115
}
@@ -91,6 +118,8 @@ int main(int argc, char* argv[]) {
91118
CpuInfer(argv[1], argv[2]);
92119
} else if (std::atoi(argv[3]) == 1) {
93120
GpuInfer(argv[1], argv[2]);
121+
} else if (std::atoi(argv[3]) == 2) {
122+
XpuInfer(argv[1], argv[2]);
94123
}
95124
return 0;
96125
}

examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc

+30-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
4747
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
4848
}
4949

50+
void XpuInfer(const std::string& model_dir, const std::string& image_file) {
51+
auto model_file = model_dir + sep + "model.pdmodel";
52+
auto params_file = model_dir + sep + "model.pdiparams";
53+
auto config_file = model_dir + sep + "infer_cfg.yml";
54+
auto option = fastdeploy::RuntimeOption();
55+
option.UseXpu();
56+
auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
57+
config_file, option);
58+
if (!model.Initialized()) {
59+
std::cerr << "Failed to initialize." << std::endl;
60+
return;
61+
}
62+
63+
auto im = cv::imread(image_file);
64+
65+
fastdeploy::vision::DetectionResult res;
66+
if (!model.Predict(im, &res)) {
67+
std::cerr << "Failed to predict." << std::endl;
68+
return;
69+
}
70+
71+
std::cout << res.Str() << std::endl;
72+
auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
73+
cv::imwrite("vis_result.jpg", vis_im);
74+
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
75+
}
76+
5077
void GpuInfer(const std::string& model_dir, const std::string& image_file) {
5178
auto model_file = model_dir + sep + "model.pdmodel";
5279
auto params_file = model_dir + sep + "model.pdiparams";
@@ -111,7 +138,7 @@ int main(int argc, char* argv[]) {
111138
"e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0"
112139
<< std::endl;
113140
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
114-
"with gpu; 2: run with gpu and use tensorrt backend."
141+
"with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
115142
<< std::endl;
116143
return -1;
117144
}
@@ -122,6 +149,8 @@ int main(int argc, char* argv[]) {
122149
GpuInfer(argv[1], argv[2]);
123150
} else if (std::atoi(argv[3]) == 2) {
124151
TrtInfer(argv[1], argv[2]);
152+
} else if (std::atoi(argv[3]) == 3) {
153+
XpuInfer(argv[1], argv[2]);
125154
}
126155
return 0;
127156
}

0 commit comments

Comments
 (0)