Skip to content

Commit 02026c7

Browse files
committed
Mar 6th
Revision for README and the codes in tc-ssn.
1 parent 3eda4a6 commit 02026c7

38 files changed

+110
-225
lines changed

README.md

100755100644
+6-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,13 @@ Note that, these methods use frame-wise fisher vector as video representation, w
1717

1818
### References
1919
[1] Y. Zhao, Y. Xiong, L. Wang, Z. Wu, X. Tang, and D. Lin. Temporal action detection with structured segment networks. In ICCV, pages 2933–2942, 2017.
20+
2021
[2] H. Xu, A. Das, and K. Saenko. R-C3D: region convolutional 3d network for temporal activity detection. In ICCV, pages 5794–5803, 2017.
22+
2123
[3] A. Richard, H. Kuehne, and J. Gall. Action sets: Weakly supervised action segmentation without ordering constraints. In CVPR, pages 5987–5996, 2018.
24+
2225
[4] A. Richard, H. Kuehne, A. Iqbal, and J. Gall. Neuralnetwork-viterbi: A framework for weakly supervised video learning. In CVPR, pages 7386–7395, 2018.
26+
2327
[5] L. Ding and C. Xu. Weakly-supervised action segmentation with iterative soft boundary assignment. In CVPR, pages 6508–6516, 2018.
24-
[6] J. Donahue, L. A. Hendricks, M. Rohrbach, S. Venugopalan, S. Guadarrama, K. Saenko, and T. Darrell. Long-term recurrent convolutional networks for visual recognition and description. TPAMI, 39(4):677–691, 2017.
28+
29+
[6] J. Donahue, L. A. Hendricks, M. Rohrbach, S. Venugopalan, S. Guadarrama, K. Saenko, and T. Darrell. Long-term recurrent convolutional networks for visual recognition and description. TPAMI, 39(4):677–691, 2017.

tc-ssn/README.md

100755100644
+15-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,20 @@
99
- terminaltables 3.1.0
1010
- pandas 0.23.4
1111

12+
### The structure of SSN score file
13+
14+
The score file dumped by SSN is in format of `pkl`. It is serialised from a python `dict` in which the paths of video frames serve as keys and a 4-element tuple of numpy arrays serve as values. The meaning of four arrays is described as following:
15+
16+
* The shape of the 1st array in the tuple is (N,2) where N denotes the proposal number. The elements in this array indicates the lower and higher bounds of the proposal ranges.
17+
* The shape of the 2nd array in the tuple is (N,K+1) where K denotes the number of action classes. There are the actionness scores in this array.
18+
* The shape of the 3rd array in the tuple is (N,K). There are the completeness scores presented by SSN in this array.
19+
* The shape of the 4th array in the tuple is (N,K,2). There are the regression scores in this array. The regression score is given as a 2-element array \[`center_regression`, `duration_regression`\]. The regression operation could be formularised as:
20+
21+
```
22+
regressed_center = range_renter+range_duration*center_regression
23+
regressed_duration = range_duration*exp(duration_regression)
24+
```
25+
1226
### Get combined score file
1327

1428
The standalone score file of combined scores is required while refining the combined scores of RGB and Flow modality. The program derived from the original evaluation program is used to export the combined scores to a standalone `pkl` file. These programs are `fusion_pkl_generation_eval_detection_results.py` and `fusion_eval_detection_results.py`. Either the program exports the same `pkl` file.
@@ -49,4 +63,4 @@ python3 combined_refine.py -c <npy_constrains> -i <src_scores> -o <refined_score
4963

5064
```sh
5165
python3 combined_eval_detection_results.py coin_small <combined_score> --externel_score <external_score>
52-
```
66+
```

tc-ssn/anet_toolkit/.gitignore

100755100644
File mode changed.

tc-ssn/anet_toolkit/Evaluation/eval_detection.py

100755100644
File mode changed.

tc-ssn/anet_toolkit/Evaluation/utils.py

100755100644
File mode changed.

tc-ssn/combined_eval_detection_results.py

+15-27
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@
4343
num_class = dataset_configs['num_class']
4444
test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])
4545
evaluate.number_label = num_class
46-
# print('hhh')
47-
# print(test_prop_file)
4846

4947
nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs['evaluation']['nms_threshold']
5048
top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k']
@@ -125,7 +123,7 @@ def gen_detection_results(video_id, score_tp):
125123

126124
# load combined scores from external numpys
127125
ex_vid = video_id.split("/")[-1]
128-
ex_scores = np.load(os.path.join(args.externel_score,ex_vid + ".npy"))
126+
ex_scores = np.load(os.path.join(args.externel_score,"proposal_" + ex_vid + ".npy"))
129127
combined_scores = ex_scores[:,:,4]
130128

131129
keep_idx = np.argsort(combined_scores.ravel())[-top_k:]
@@ -245,49 +243,39 @@ def callback(rst):
245243
ap_values[rst[0], rst[1]] = rst[2][0]
246244
ar_values[rst[0], rst[1]] = rst[2][1]
247245

248-
zdy_miou = np.zeros((num_class,))
246+
zdy_miou = np.zeros((num_class,)) # used to store the mIoU of each classes
249247

250-
pku_gt_by_class = [[] for i in range(num_class)]
251-
pku_prediction_by_class = [[] for i in range(num_class)]
252-
pku_gt = []
253-
pku_prediction = []
248+
gt_by_class = [[] for i in range(num_class)]
249+
prediction_by_class = [[] for i in range(num_class)]
250+
gt = []
251+
prediction = []
254252
for cls in range(num_class):
255253
for zdy_record in gt_by_cls[cls].itertuples():
256-
pku_gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
257-
pku_gt += pku_gt_by_class[cls]
254+
gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
255+
gt += gt_by_class[cls]
258256
for zdy_record in plain_detections[cls].itertuples():
259-
pku_prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
260-
pku_prediction += pku_prediction_by_class[cls]
257+
prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
258+
prediction += prediction_by_class[cls]
261259
if cls!=0:
262-
zdy_miou[cls] = evaluate.miou(pku_prediction_by_class[cls],pku_gt_by_class[cls])
260+
zdy_miou[cls] = evaluate.miou(prediction_by_class[cls],gt_by_class[cls])
263261
miou = zdy_miou[1:].mean()
264262

265-
print(str(len(pku_gt)))
266-
print(str(len(pku_prediction)))
263+
print(str(len(gt)))
264+
print(str(len(prediction)))
267265

268266
f1_values = np.zeros((len(iou_range),))
269267

270268
pool = Pool(args.ap_workers)
271269
jobs = []
272270
for iou_idx, min_overlap in enumerate(iou_range):
273-
#for iou_idx, min_overlap in enumerate([0.6]):
274271
for cls in range(num_class):
275-
#for cls in [304]:
276-
#jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_cls[cls], plain_detections[cls],),callback=callback))
277-
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, pku_gt_by_class[cls], pku_prediction_by_class[cls],),callback=callback))
278-
f1 = evaluate.f1(pku_prediction,min_overlap,pku_gt)
272+
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_class[cls], prediction_by_class[cls],),callback=callback))
273+
f1 = evaluate.f1(prediction,min_overlap,gt)
279274
f1_values[iou_idx] = f1
280275
pool.close()
281276
pool.join()
282277
print("Evaluation done.\n\n")
283278

284-
"""for zdy_i,zdy_iou in enumerate(iou_range):
285-
with open("accuracy_per_cls/cls_pku{:f}.txt".format(zdy_iou),"w") as zdy_f:
286-
for zdy_cls in range(num_class):
287-
zdy_f.write("{:d}\t{:.04f}\n".format(zdy_cls,ap_values[zdy_cls][zdy_i]))"""
288-
289-
#map_iou = ap_values[1:,:].mean(axis=0)
290-
#mar = ar_values[1:,:].mean(axis=0)
291279
map_iou = ap_values.mean(axis=0)
292280
mar = ar_values.mean(axis=0)
293281
display_title = "Detection Performance on {}".format(args.dataset)

tc-ssn/combined_refine.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
#!/usr/bin/python3
22

3-
#import json
3+
"""
4+
Refine the scores combined from actionness and completeness scores outputed by SSN.
5+
6+
Last revision: Danyang Zhang @THU_IVG @Mar 6th, 2019 CST
7+
"""
8+
49
import numpy as np
510
import os
611
import os.path
@@ -14,7 +19,7 @@
1419
parser.add_argument("--target","-o",action="store",type=str,default="test_gt_score_combined_refined_fusion")
1520
args = parser.parse_args()
1621

17-
constraints = np.load(args.constraints)
22+
constraints = np.load(args.constraints) # constraints matrix
1823
target_class_count,action_class_count = constraints.shape
1924

2025
numpy_dir = args.src_score
@@ -32,17 +37,18 @@
3237
vid = np_file[np_file.find("_")+1:np_file.rfind(".")]
3338
premat = np.load(os.path.join(numpy_dir,np_file))
3439
combined = premat[:,:,4]
35-
#print(str(combined.shape))
3640
video_combined = np.sum(combined,axis=0)
3741
target_class_combined = np.zeros((target_class_count,))
3842
for target_cls in range(target_class_count):
3943
for act_cls in range(action_class_count):
4044
if constraints[target_cls][act_cls]==1:
41-
target_class_combined[target_cls] = video_combined[act_cls]
42-
probable_target_class = np.argmax(target_class_combined)
45+
target_class_combined[target_cls] += video_combined[act_cls]
46+
# aggregate the scores of the action classes under the identical task/target class
47+
probable_target_class = np.argmax(target_class_combined) # infer the probable task class
4348
mask = np.full(combined.shape,math.exp(-2))
4449
mask[:,0] = 1
4550
mask[:,np.where(constraints[probable_target_class])[0]] = 1
4651
combined *= mask
52+
# refine the combined scores
4753
premat[:,:,4] = combined
4854
np.save(os.path.join(target_dir,np_file),premat)

tc-ssn/data/coin_small_tag_train_proposal_list.txt

100755100644
File mode changed.

tc-ssn/data/coin_small_tag_val_proposal_list.txt

100755100644
File mode changed.

tc-ssn/data/dataset_cfg.yaml

100755100644
File mode changed.

tc-ssn/data/reference_models.yaml

100755100644
File mode changed.

tc-ssn/data_processing.py

+16-23
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#!/usr/bin/python3
22

3+
"""
4+
Transfer the pkl scores to npy.
5+
6+
Last revision: Danyang Zhang @THU_IVG @Mar 6th, 2019 CST
7+
"""
8+
39
import numpy as np
410
import json
511
import os
@@ -23,49 +29,36 @@
2329

2430
for v in scores:
2531
vid = v.split("/")[-1]
26-
#video_duration = annotations[vid]["end"]-annotations[vid]["start"]
2732
video_duration = annotations[vid]["duration"]
2833

2934
proposals = scores[v][0]
3035
actionness = scores[v][1]
3136
completeness = scores[v][2]
3237
regression = scores[v][3]
3338

34-
score_max = np.max(actionness[:,1:],axis=-1)
35-
exp_score = np.exp(actionness[:,1:]-score_max[...,None])
39+
score_max = np.max(actionness,axis=-1)
40+
exp_score = np.exp(actionness-score_max[...,None])
3641
exp_com = np.exp(completeness)
37-
combined_scores = (exp_score/np.sum(exp_score,axis=-1)[...,None])*exp_com
42+
combined_scores = (exp_score/np.sum(exp_score,axis=-1)[...,None])[:,1:]*exp_com
43+
# combined scores are calculated as softmax(actionness)*exp(completeness) according to the code offered by SSN
3844

3945
proposal_count = len(proposals)
4046
class_count = completeness.shape[1]
4147
proposal_npy = np.zeros((proposal_count,class_count,7))
48+
# the columns in proposal_npy:
49+
# start of the proposal range, end of the proposal range, exp(actionness), exp(completeness), combined score, actionness, completeness
50+
4251
for i in range(proposal_count):
4352
start = proposals[i][0]*video_duration
4453
end = proposals[i][1]*video_duration
4554

4655
for c in range(class_count):
47-
center_proportion = (proposals[i][0]+proposals[i][1])/2.
48-
duration_proportion = proposals[i][1]-proposals[i][0]
49-
center_proportion += regression[i][c][0]*duration_proportion
50-
duration_proportion *= math.exp(regression[i][c][1])
51-
start_proportion = center_proportion-duration_proportion/2.
52-
end_proportion = center_proportion+duration_proportion/2.
53-
start_proportion = max(start_proportion,0.)
54-
start_proportion = min(start_proportion,1.)
55-
end_proportion = max(end_proportion,0.)
56-
end_proportion = min(end_proportion,1.)
57-
#pre_cls["regressed_interval"] = (start_proportion*video_duration,end_proportion*video_duration)
58-
59-
proposal_npy[i][c][0] = start_proportion*video_duration
60-
proposal_npy[i][c][1] = end_proportion*video_duration
61-
proposal_npy[i][c][2] = exp_score[i][c]
56+
proposal_npy[i][c][0] = proposals[i][0]
57+
proposal_npy[i][c][1] = proposals[i][1]
58+
proposal_npy[i][c][2] = exp_score[i][c+1]
6259
proposal_npy[i][c][3] = exp_com[i][c]
6360
proposal_npy[i][c][4] = combined_scores[i][c]
6461
proposal_npy[i][c][5] = actionness[i][c+1]
6562
proposal_npy[i][c][6] = completeness[i][c]
66-
6763
npy_name = os.path.join(output_prefix,"proposal_" + vid)
6864
np.save(npy_name,proposal_npy)
69-
np.save(npy_name + "_groundtruth",groundtruth_npy)
70-
#prediction_dict[vid]["prediction_numpy"] = npy_name + ".npy"
71-
#prediction_dict[vid]["groundtruth_numpy"] = npy_name + "_groundtruth" + ".npy"

tc-ssn/eval_detection_results.py

+13-25
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
num_class = dataset_configs['num_class']
4141
test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])
4242
evaluate.number_label = num_class
43-
# print('hhh')
44-
# print(test_prop_file)
4543

4644
nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs['evaluation']['nms_threshold']
4745
top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k']
@@ -239,47 +237,37 @@ def callback(rst):
239237

240238
zdy_miou = np.zeros((num_class,))
241239

242-
pku_gt_by_class = [[] for i in range(num_class)]
243-
pku_prediction_by_class = [[] for i in range(num_class)]
244-
pku_gt = []
245-
pku_prediction = []
240+
gt_by_class = [[] for i in range(num_class)]
241+
prediction_by_class = [[] for i in range(num_class)]
242+
gt = []
243+
prediction = []
246244
for cls in range(num_class):
247245
for zdy_record in gt_by_cls[cls].itertuples():
248-
pku_gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
249-
pku_gt += pku_gt_by_class[cls]
246+
gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
247+
gt += gt_by_class[cls]
250248
for zdy_record in plain_detections[cls].itertuples():
251-
pku_prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
252-
pku_prediction += pku_prediction_by_class[cls]
249+
prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
250+
prediction += prediction_by_class[cls]
253251
if cls!=0:
254-
zdy_miou[cls] = evaluate.miou(pku_prediction_by_class[cls],pku_gt_by_class[cls])
252+
zdy_miou[cls] = evaluate.miou(prediction_by_class[cls],gt_by_class[cls])
255253
miou = zdy_miou[1:].mean()
256254

257-
print(str(len(pku_gt)))
258-
print(str(len(pku_prediction)))
255+
print(str(len(gt)))
256+
print(str(len(prediction)))
259257

260258
f1_values = np.zeros((len(iou_range),))
261259

262260
pool = Pool(args.ap_workers)
263261
jobs = []
264262
for iou_idx, min_overlap in enumerate(iou_range):
265-
#for iou_idx, min_overlap in enumerate([0.6]):
266263
for cls in range(num_class):
267-
#for cls in [304]:
268-
#jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_cls[cls], plain_detections[cls],),callback=callback))
269-
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, pku_gt_by_class[cls], pku_prediction_by_class[cls],),callback=callback))
270-
f1 = evaluate.f1(pku_prediction,min_overlap,pku_gt)
264+
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_class[cls], prediction_by_class[cls],),callback=callback))
265+
f1 = evaluate.f1(prediction,min_overlap,gt)
271266
f1_values[iou_idx] = f1
272267
pool.close()
273268
pool.join()
274269
print("Evaluation done.\n\n")
275270

276-
"""for zdy_i,zdy_iou in enumerate(iou_range):
277-
with open("accuracy_per_cls/cls_pku{:f}.txt".format(zdy_iou),"w") as zdy_f:
278-
for zdy_cls in range(num_class):
279-
zdy_f.write("{:d}\t{:.04f}\n".format(zdy_cls,ap_values[zdy_cls][zdy_i]))"""
280-
281-
#map_iou = ap_values[1:,:].mean(axis=0)
282-
#mar = ar_values[1:,:].mean(axis=0)
283271
map_iou = ap_values.mean(axis=0)
284272
mar = ar_values.mean(axis=0)
285273
display_title = "Detection Performance on {}".format(args.dataset)

0 commit comments

Comments
 (0)