-
-
Notifications
You must be signed in to change notification settings - Fork 56.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cuda_OpticalFlowDual_TVL1 is not thread-safe in python #18155
Comments
Please try the latest release. |
I have updated my OpenCV to 4.4.0, but unfortunately, there still happens in the above code. import cv2
from threading import Thread, Lock
import numpy as np
def job(video_path, lock: Lock):
optical_flow = cv2.cuda_OpticalFlowDual_TVL1.create()
video_capture = cv2.VideoCapture(video_path)
_, prev_frame = video_capture.read()
prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
output = []
for i in range(10):
_, current_frame = video_capture.read()
current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
# add thread lock
lock.acquire()
try:
cuMat1 = cv2.cuda_GpuMat()
cuMat2 = cv2.cuda_GpuMat()
cuMat1.upload(prev_frame)
cuMat2.upload(current_frame)
cu_flow = optical_flow.calc(cuMat1, cuMat2, None)
optical_flow_data = cu_flow.download()
finally:
lock.release()
output.append(optical_flow_data)
prev_frame = current_frame
np.save('{}.npy'.format(video_path[:-4]), output)
if __name__ == '__main__':
video_path_list = ['video1.avi',
'video2.avi',
'video3.avi']
worker_list = []
lock = Lock()
for i in range(3):
t = Thread(target=job, kwargs={'video_path': video_path_list[i], 'lock': lock})
t.start()
worker_list.append(t)
for worker in worker_list:
worker.join() update the environment setting
|
@nglee Do you have a chance to take a look on this? |
@daniel-code Test Code
import cv2
from threading import Thread, Lock
import numpy as np
def job(video_path, output):
optical_flow = cv2.cuda_OpticalFlowDual_TVL1.create()
video_capture = cv2.VideoCapture(video_path)
_, prev_frame = video_capture.read()
prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
for i in range(10):
_, current_frame = video_capture.read()
current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
cuMat1 = cv2.cuda_GpuMat()
cuMat2 = cv2.cuda_GpuMat()
cuMat1.upload(prev_frame)
cuMat2.upload(current_frame)
cu_flow = optical_flow.calc(cuMat1, cuMat2, None, cv2.cuda_Stream())
optical_flow_data = cu_flow.download()
output.append(optical_flow_data)
prev_frame = current_frame
if __name__ == '__main__':
video_path_list = ['E:/repos/opencv_extra/testdata/gpu/video/768x576.avi',
'E:/repos/opencv_extra/testdata/gpu/video/1920x1080.avi',
'E:/repos/opencv_extra/testdata/highgui/video/big_buck_bunny.mp4']
# synchronous launch
out0 = []
out1 = []
out2 = []
job(video_path_list[0], out0)
job(video_path_list[1], out1)
job(video_path_list[2], out2)
print('synchronous run complete')
# asynchronous launch
tout0 = []
tout1 = []
tout2 = []
t1 = Thread(target=job, kwargs={'video_path': video_path_list[0], 'output': tout0})
t1.start()
t2 = Thread(target=job, kwargs={'video_path': video_path_list[1], 'output': tout1})
t2.start()
t3 = Thread(target=job, kwargs={'video_path': video_path_list[2], 'output': tout2})
t3.start()
t1.join()
t2.join()
t3.join()
print('asynchronous run complete')
# compare synchronous and asynchronous result
print(np.array_equal(out0, tout0))
print(np.array_equal(out1, tout1))
print(np.array_equal(out2, tout2)) Test Code in C++
void helper(const string& path, vector<Mat>* out_vec)
{
cv::VideoCapture capture(path);
cv::Mat _prev;
capture.read(_prev);
cv::cuda::HostMem prev(_prev.size(), CV_8UC1);
cv::cuda::HostMem cur(_prev.size(), CV_8UC1);
cv::cvtColor(_prev, prev.createMatHeader(), cv::COLOR_BGR2GRAY);
auto alg = cv::cuda::OpticalFlowDual_TVL1::create();
cv::cuda::Stream stream;
cv::cuda::GpuMat d_prev;
d_prev.upload(prev, stream);
for (int i = 0; i < 90; ++i)
{
cv::Mat _cur;
capture.read(_cur);
cv::cvtColor(_cur, cur.createMatHeader(), cv::COLOR_BGR2GRAY);
cv::cuda::GpuMat d_cur;
d_cur.upload(cur, stream);
cv::cuda::GpuMat d_out;
alg->calc(d_prev, d_cur, d_out, stream);
cv::cuda::HostMem out;
d_out.download(out, stream);
stream.waitForCompletion();
out_vec->push_back(out.createMatHeader().clone());
d_prev = d_cur;
}
}
TEST(OpticalFlowDual_TVL1_Issue, Issue18155)
{
vector<string> video_path_list;
video_path_list.emplace_back("E:/repos/opencv_extra/testdata/gpu/video/768x576.avi");
video_path_list.emplace_back("E:/repos/opencv_extra/testdata/gpu/video/1920x1080.avi");
video_path_list.emplace_back("E:/repos/opencv_extra/testdata/highgui/video/big_buck_bunny.mp4");
// synchronous run
vector<Mat> t1_sync;
vector<Mat> t2_sync;
vector<Mat> t3_sync;
auto start = std::chrono::high_resolution_clock::now();
{
helper(video_path_list[0], &t1_sync);
helper(video_path_list[1], &t2_sync);
helper(video_path_list[2], &t3_sync);
}
auto end = std::chrono::high_resolution_clock::now();
cout << "Synchronous run complete (" << std::to_string(std::chrono::duration<float, std::milli>(end - start).count()) << " ms)" << std::endl;
// asynchronous run
vector<Mat> t1_async;
vector<Mat> t2_async;
vector<Mat> t3_async;
start = std::chrono::high_resolution_clock::now();
{
std::thread thread1(helper, video_path_list[0], &t1_async);
std::thread thread2(helper, video_path_list[1], &t2_async);
std::thread thread3(helper, video_path_list[2], &t3_async);
thread1.join();
thread2.join();
thread3.join();
}
end = std::chrono::high_resolution_clock::now();
cout << "All threads complete (Asynchronous run complete) (" << std::to_string(std::chrono::duration<float, std::milli>(end - start).count()) << " ms)" << std::endl;
std::cout << std::to_string(t1_sync.size()) << std::endl;
std::cout << std::to_string(t2_sync.size()) << std::endl;
std::cout << std::to_string(t3_sync.size()) << std::endl;
std::cout << std::to_string(t1_async.size()) << std::endl;
std::cout << std::to_string(t2_async.size()) << std::endl;
std::cout << std::to_string(t3_async.size()) << std::endl;
for (int i = 0; i < t1_sync.size(); ++i)
EXPECT_MAT_NEAR(t1_sync[i], t1_async[i], 0.0);
for (int i = 0; i < t2_sync.size(); ++i)
EXPECT_MAT_NEAR(t2_sync[i], t2_async[i], 0.0);
for (int i = 0; i < t3_sync.size(); ++i)
EXPECT_MAT_NEAR(t3_sync[i], t3_async[i], 0.0);
} |
Thank you for your support. |
It runs correctly in v4.4.0. |
System information (version)
Detailed description
I run the same code twice and find cuda_OpticalFlowDual_TVL1 got different results when calculating optical flow using python multithreading.
The different parts of two optical flow result in the same video seem frame-based. Some results of frames in the same video are consistent and some not.
This problem does not appear when using single thread.
Steps to reproduce
Issue submission checklist
answers.opencv.org, Stack Overflow, etc and have not found solution
The text was updated successfully, but these errors were encountered: