-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtracking.py
316 lines (260 loc) · 15.9 KB
/
tracking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import cv2
import argparse
import numpy as np
from particle_filter import ParticleFilter, particle_dict, resample_dict
from utils import descriptor_dict, similarity_dict, slicer_dict
from detect_init import Model
from typing import Tuple
def get_opts():
parser = argparse.ArgumentParser()
# File args
parser.add_argument('--filepath', type=str, required=True, help='filepath of the video')
parser.add_argument('--scale-factor', type=float, default=1.,
help='Scale used to resize the frames of the video')
# Particle Filter args
parser.add_argument('--N', type=int, default=500,
help='number of particles')
parser.add_argument('--particle', type=str, default='cap2Dbb',
choices=['cap2Dfbb', 'cap2Dbb', 'ppp2Dbb'],
help='which particle structure to use')
parser.add_argument('--descriptor', type=str, default='hog',
choices=['hog', 'hogcolor', 'hogcascade', 'hogcascadelbp', 'lbp'],
help='which descriptor to use')
parser.add_argument('--similarity', type=str, default='bd',
choices=['bds', 'bdl', 'cos', 'dkl'],
help='which similartiy measure to use')
parser.add_argument('--resampling', type=str, default='systematic',
choices=['systematic', 'residual', 'stratified', 'multinomial'],
help='which resample method to use')
parser.add_argument('--slicer', type=str, default='resize',
choices=['resize', 'crop'],
help='which slicer to use')
parser.add_argument('--alpha', type=float, default=1.2, help='Scaling factor for the search area')
parser.add_argument('--resample-factor', type=float, default=1./4., help='Factor used to compute the resampling threshold')
# Descriptor args
parser.add_argument('--nb-features', type=int, default=4000, help='Max number of feature for keypoint descriptors')
parser.add_argument('--desc-size', nargs=2, type=int, default=None, help='Fix size used force all patch to have the same size for each frame')
parser.add_argument('--lbp-radius', type=int, default=1, help='Radius of the LBP')
parser.add_argument('--lbp-nbpoints', type=int, default=8, help='Number of point used for the LBP')
# Model args
parser.add_argument('--weights', nargs='+', type=str, default='weights/cuttlefish_best.pt', help='model.pt path(s)')
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
# Video args
parser.add_argument('--save-video', action='store_true', help='Save a video')
parser.add_argument('--save-path', type=str, default='results', help='Path to save the video')
# Seed for random generator
parser.add_argument('--seed', type=int, default=None, help='Seed for the random generator')
return parser.parse_args()
# Draw Bbox
def draw_output_frame(frame : np.array, estimate_particle : np.array, color=(0,255,0)):
output_frame = frame
for p in estimate_particle:
output_frame = cv2.rectangle(
output_frame,
(int(p[0] - p[6]), int(p[3] - p[7])),
(int(p[0] + p[6]), int(p[3] + p[7])),
color,
thickness=2)
return output_frame
# Draw each particles
def draw_output_particles(frame : np.array, particles : np.array,color=(0,0,255)):
output_frame = frame
for particle in particles[:, 0]:
output_image = cv2.circle(output_frame, (int(particle[0]), int(particle[3])), radius=1, color=color, thickness=1)
return output_image
# Draw the mean particle
def draw_output_mean_particule(frame : np.array, mean_particles : np.array, color=(255,0,0)):
output_frame = frame
for p in mean_particles:
output_image = cv2.circle(output_frame, (int(p[0]), int(p[3])), radius=3, color=color, thickness=3)
return output_image
# Draw the search area
def draw_search_area(frame : np.array, mean_particles : np.array, search_area : np.array, color=(0,0,255)):
output_frame = frame
for i, p in enumerate(mean_particles):
output_frame = cv2.rectangle(
output_frame,
(int(p[0] - search_area[i, 0]), int(p[3] - search_area[i, 1])),
(int(p[0] + search_area[i, 0]), int(p[3] + search_area[i, 1])),
color, thickness=2)
return output_frame
# Display the frame's number
def draw_frame_number(frame : np.array, nbframe: int, color=(200,200,200)):
output_frame = cv2.putText(frame, f'{nbframe}', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA)
return output_frame
# Select a cuttlefish to track based on confidence and distance from the center of the image
def cuttlefish_picker_WSE(init_frame, conf, cuttlefish):
error = (1-conf) * np.sqrt(((init_frame.shape[1]//2)-cuttlefish[:, 0])**2 + ((init_frame.shape[0]//2)-cuttlefish[:, 1])**2)
index = np.argmin(error)
return (conf[index], cuttlefish[index])
# Select a random cuttlefish to track
def cuttlefish_picker_random(init_frame, conf, cuttlefish):
index = np.random.default_rng().integers(low=0, high=conf.shape[0])
return (conf[index], cuttlefish[index])
if __name__ == "__main__":
# get params
args = get_opts()
# init params
N = abs(args.N)
seed = args.seed
nbframe = 0
stop = False
outArray = []
# init Model
model = Model(
weights=args.weights, device=args.device, img_size=args.img_size,
conf_thres=args.conf_thres, iou_thres=args.iou_thres)
# Load video
cap = cv2.VideoCapture(args.filepath)
fps = cap.get(cv2.CAP_PROP_FPS)
# Select a cuttlefish to track
conf = []
while not len(conf):
# Read first frame
ret, current_frame = cap.read()
nbframe += 1
if not ret:
print("Error : Couldn't read frame")
quit()
# Resize if video_size is not None
if args.scale_factor != 1.:
current_frame = cv2.resize(current_frame, (int(current_frame.shape[1]*args.scale_factor), int(current_frame.shape[0]*args.scale_factor)), interpolation=cv2.INTER_AREA)
# Save image dimensions
img_size = (current_frame.shape[1], current_frame.shape[0])
# Run YOLOV7 to get bounding boxes
conf, cuttlefish = model.detect(current_frame)
if(len(conf)):
tracked_conf, tracked_cuttlefish = cuttlefish_picker_WSE(current_frame, conf, cuttlefish)
# tracked_conf, tracked_cuttlefish = cuttlefish_picker_random(current_frame, conf, cuttlefish)
# _, init_frame = cap.read()
# tracked_conf, tracked_cuttlefish = (0.73389, np.array([824.5, 798.5, 203/2, 151/2]))
# Selected Cuttlefish
tracked_cuttlefish[2:] /= 2
print(tracked_conf, tracked_cuttlefish)
# Create video output
if args.save_video:
outvid = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, img_size)
# Create initial position
ratio = img_size[0]/img_size[1]
init_pos = np.array([[
[tracked_cuttlefish[0], 0, 0, tracked_cuttlefish[1], 0, 0, tracked_cuttlefish[2], tracked_cuttlefish[3]],
[(1-tracked_conf)*50*ratio, 0.1*ratio, 0.1*ratio, (1-tracked_conf)*50, 0.1, 0.1, (1-tracked_conf), (1-tracked_conf)]
]])
# Create covariance matrices for prediction and update model
# Sequence 1
# Q_motion = np.array([[5*ratio, 0.1*fps*ratio, 0.2*fps*ratio, 5, 0.1*fps, 0.2*fps, 0.2*ratio, 0.2]]) # cap2Dbb lbp dbl MLE
# Sequence 2
# Q_motion = np.array([[5*ratio, 0.1*fps*ratio, 0.2*fps*ratio, 5, 0.1*fps, 0.2*fps, 0.2*ratio, 0.2]]) # ppp2Dbb lbp dbs MLE
# Q_motion = np.array([[10*ratio, 0.1*fps*ratio, 0.4*fps*ratio, 10, 0.1*fps, 0.4*fps, 0.1*ratio, 0.1]]) # cap2Dbb hog dbs MLE
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.1*ratio, 0.1]]) # ppp2Dbb hogcascade dbs MLE R0.05 128 64
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.1*ratio, 0.1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 128 64
# Q_motion = np.array([[8*ratio, 0, 0, 8, 0, 0, 0.4, 0.4]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.0125 0.025) nbp16 r2
# Q_motion = np.array([[5, 0, 0, 5, 0, 0, 0.8, 0.8]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.0125 0.025) nbp16 r2
# Q_motion = np.array([[2, 0, 0, 4, 0, 0, 0.4, 0.4]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.0125 0.025) nbp16 r2
# Q_motion = np.array([[0.5, 0, 0, 0.5, 0, 0, 0.2, 0.2]]) # ppp2Dbb hogcascadelbp cos MLE R0.1 64 64 np100 (gamma 0.0125 0.025) nbp16 r2
# Q_motion = np.array([[0.1, 0, 0, 0.1, 0, 0, 0.1, 0.1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.0125 0.025) nbp16 r2
# Q_motion = np.array([[4, 0, 0, 8, 0, 0, 0.4, 0.8]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np500 (gamma 0.0125 0.025) nbp16 r2
# Q_motion = np.array([[8*ratio, 0, 0, 8, 0, 0, 0.8, 1.0]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.0125 0.025) nbp12 r2
# Q_motion = np.array([[2, 0, 0, 2, 0, 0, 1, 1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 128 128 np1000 (gamma 0.0125 0.025)
# Sequence 4
# Q_motion = np.array([[20*ratio, 10*fps*ratio, 20*fps*ratio, 20, 10*fps, 20*fps, 0.8*ratio, 0.8]]) # cap2Dbb hog bdl MAP
# Q_motion = np.array([[1*ratio, 0.25*fps*ratio, 0.5*fps*ratio, 1, 0.25*fps, 0.25*fps, 0*ratio, 0]]) # cap2Dfbb hog bds MLE
# Q_motion = np.array([[1*ratio, 0.5*fps*ratio, 1*fps*ratio, 1, 0.5*fps, 1*fps, 0.2*ratio, 0.2]]) # cap2Dbb hog bds MLE
# Q_motion = np.array([[5*ratio, 0.4*fps*ratio, 0.8*fps*ratio, 5, 0.4*fps, 0.8*fps, 0.1*ratio, 0.1]]) # cap2Dbb hog bds MLE
# Q_motion = np.array([[0.1*ratio, 0*fps*ratio, 0*fps*ratio, 0.1, 0*fps, 0*fps, 0.2*ratio, 0.2]]) # ppp2Dbb hog bds MLE
# Q_motion = np.array([[1*ratio, 0*fps*ratio, 0*fps*ratio, 1, 0*fps, 0*fps, 1*ratio, 1]]) # ppp2Dbb hog bds MLE gaussianblur
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.1*ratio, 0.1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 128 np100 (gamma 0.0125 0.025)
# Q_motion = np.array([[8*ratio, 0, 0, 8, 0, 0, 0.2, 0.4]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 128 np100 (gamma 0.0125 0.025)
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.1*ratio, 0.1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 128 np100
# Q_motion = np.array([[10*ratio, 0, 0, 10, 0, 0, 0.2*ratio, 0.2]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.0125 0.025)
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.5*ratio, 0.5]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np500 (gamma 0.0125 0.025)
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.6, 0.9]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np500 (gamma 0.0125 0.025)
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.6, 0.9]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np500 (gamma 0.025 0.05)
# Q_motion = np.array([[5*ratio, 0, 0, 5, 0, 0, 0.7, 1.0]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np500 (gamma 0.025 0.05)
# Q_motion = np.array([[5, 0, 0, 5, 0, 0, 0.05, 0.05]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.025 0.05)
# Q_motion = np.array([[5, 0, 0, 5, 0, 0, 1, 1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np500 (gamma 0.025 0.05)
# Q_motion = np.array([[2*ratio, 0, 0, 2, 0, 0, 0.2, 0.2]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np100 (gamma 0.0125 0.025) 2*w h/2
# Q_motion = np.array([[4, 0, 0, 2, 0, 0, 0.4, 0.1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np1000 (gamma 0.0125 0.025)
# Q_motion = np.array([[2*ratio, 0, 0, 2, 0, 0, 0.2, 0.2]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np1000 (gamma 0.0125 0.025)
# Q_motion = np.array([[2, 0, 0, 2, 0, 0, 1, 1]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np1000 (gamma 0.0125 0.025)
Q_motion = np.array([[2*ratio, 0, 0, 2, 0, 0, 0.2, 0.2]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 128 128 np1000 (gamma 0.0125 0.025)
# Q_motion = np.array([[2, 0, 0, 2, 0, 0, 0.2, 0.2]]) # ppp2Dbb hogcascadelbp cos MLE R0.05 64 64 np1000 (gamma 0.025 0.05)
# R = np.array([[0.1]])
# R = np.array([[0.05]])
R = np.array([[0.05, 0.2]])
# R = np.array([[25]])
# Set size of the descriptor and slicer
desc_size = args.desc_size if args.desc_size!=None else (2*int(tracked_cuttlefish[2]), 2*int(tracked_cuttlefish[3]))
# init functions and classes
if args.descriptor == 'hog' or args.descriptor == 'hogcolor' or args.descriptor == 'hogcascade':
descriptor = descriptor_dict[args.descriptor](desc_size, freezeSize=(args.desc_size!=None))
elif args.descriptor == 'lbp':
descriptor = descriptor_dict[args.descriptor](args.lbp_nbpoints, args.lbp_radius)
elif args.descriptor == 'hogcascadelbp':
descriptor = descriptor_dict[args.descriptor](desc_size, args.lbp_nbpoints, args.lbp_radius)
else:
descriptor = descriptor_dict[args.descriptor](args.nb_features)
similarity = similarity_dict[args.similarity]
resampling = resample_dict[args.resampling]
slicer = slicer_dict[args.slicer](desc_size, np.copy(current_frame), freezeSize=(args.desc_size!=None))
particle_struct = particle_dict[args.particle]
# Initialize Particle filter
particle_filter = ParticleFilter(
N, particle_struct, 1,
init_pos, np.copy(current_frame),
args.alpha, Q_motion, R,
slicer, descriptor, similarity, resampling,
seed)
# Show Initial particles
output_frame = draw_output_frame(np.copy(current_frame), particle_filter.mu)
output_frame = draw_output_particles(output_frame, particle_filter.particles)
output_frame = draw_output_mean_particule(output_frame, particle_filter.mu)
output_frame = draw_frame_number(output_frame, nbframe)
# output_frame = draw_search_area(output_frame, particle_filter.mu, particle_filter.search_area)
cv2.imshow('Track Cuttlefish', output_frame)
# Write the frame into the video file
if args.save_video:
outvid.write(output_frame)
outArray.append(particle_filter.mu[0])
cv2.waitKey(0)
# Processing loop
while not stop:
# Read a frame
ret, current_frame = cap.read()
nbframe += 1
if not ret:
print("Error : Couldn't read frame")
break
# Resize if video_size is not None
if args.scale_factor != 1.:
current_frame = cv2.resize(current_frame, (int(current_frame.shape[1]*args.scale_factor), int(current_frame.shape[0]*args.scale_factor)), interpolation=cv2.INTER_AREA)
# Perform a pass of the particle filter
particle_filter.forward(np.copy(current_frame), 1./fps, args.resample_factor)
# Mean particle
outArray.append(particle_filter.mu[0])
print(particle_filter.mu)
print()
# Draw Bbox and particles on the frame
output_frame = draw_output_frame(np.copy(current_frame), particle_filter.mu)
output_frame = draw_output_particles(output_frame, particle_filter.particles)
output_frame = draw_output_mean_particule(output_frame, particle_filter.mu)
output_frame = draw_frame_number(output_frame, nbframe)
# output_frame = draw_search_area(output_frame, particle_filter.mu, particle_filter.search_area)
cv2.imshow('Track Cuttlefish', output_frame)
# cv2.waitKey(0)
# Write the frame into the video file
if args.save_video:
outvid.write(output_frame)
# Press Q to stop the particle filter
if cv2.waitKey(1) & 0xFF == ord('q'):
stop = True
# Release the video capture and video write objects
cap.release()
if args.save_video:
np.savetxt('bboxSave.out', np.array(outArray))
outvid.release()
# Closes all the windows
cv2.destroyAllWindows()