Skip to content

Commit 54a70fb

Browse files
committed
Version 9.0.4.
Chris sent me this tarball today, but it dates from 2012. The CHANGELOG is short ("Fixed yaap measure (Stephen Robertson)") but there seem to be a lot of changes in a lot of files from 9.0a3, including several measure files. This commit just brings in his code without even compile checking or running tests.
1 parent c0b2cc7 commit 54a70fb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2929
-355
lines changed

Diff for: CHANGELOG

+14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
------------------------------------------------------------------------------
22
Change Log (only recent)
33
------------------------------------------------------------------------------
4+
12/5/12 Version 9.0.4
5+
Fixed yaap measure (Stephen Robertson).
6+
12/5/12 Version 9.0.3
7+
Added yaap measure (Stephen Robertson).
8+
2/28/12 Version 9.0.2
9+
Cleaned up warning messages again, Corrected -c flag implementation
10+
7/14/09 Version 9.0
11+
Long past time to get this out there, despite my plans
12+
for more features! Minor cleanup of warning messages
13+
in 64bit and gcc 4.4 compilers
14+
12/31/08
15+
comments and documentation of Zscore file format corrected.
16+
trec_eval.c get_zscores.c
417
3/3/08 Version 9.0alpha.
518
Complete rewrite of entire trec_eval (needed for a long time -
619
core code was written in the 1980's and was ugly then!).
@@ -25,6 +38,7 @@ Change Log (only recent)
2538

2639
Measures added:
2740
ndcg, ndcg_cut, set_F, success, map_avgjg, P_avgjg, runid,
41+
relstring, ndcg_p
2842
various preference evaluation measures.
2943
Measures renamed:
3044
set_P was exact_prec

Diff for: Makefile

+18-12
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@ BIN = /usr/local/bin
66

77
H = .
88

9-
VERSIONID = 9.0alpha.3
9+
VERSIONID = 9.0.4
1010

1111
# gcc
1212
CC = gcc
13+
#CFLAGS = -g -I$H -Wall -DVERSIONID=\"$(VERSIONID)\"
14+
#CFLAGS = -g -I$H -Wall -DMDEBUG -DVERSIONID=\"$(VERSIONID)\"
15+
#CFLAGS = -pg -I$H -O3 -Wall -DVERSIONID=\"$(VERSIONID)\"
16+
#CFLAGS = -g -I$H -O3 -Wall -DVERSIONID=\"$(VERSIONID)\"
1317
CFLAGS = -g -I$H -Wall -DVERSIONID=\"$(VERSIONID)\"
14-
CFLAGS = -g -I$H -Wall -DMDEBUG -DVERSIONID=\"$(VERSIONID)\"
15-
CFLAGS = -pg -I$H -O3 -Wall -DVERSIONID=\"$(VERSIONID)\"
16-
CFLAGS = -g -I$H -O3 -Wall -DVERSIONID=\"$(VERSIONID)\"
1718

1819
# Other macros used in some or all makefiles
1920
INSTALL = /bin/mv
@@ -23,13 +24,16 @@ TOP_SRCS = trec_eval.c formats.c meas_init.c meas_acc.c meas_avg.c \
2324

2425
FORMAT_SRCS = get_qrels.c get_trec_results.c get_prefs.c get_qrels_prefs.c \
2526
get_qrels_jg.c form_res_rels.c form_res_rels_jg.c \
26-
form_prefs_counts.c utility_pool.c
27+
form_prefs_counts.c \
28+
utility_pool.c get_zscores.c convert_zscores.c
2729

2830
MEAS_SRCS = measures.c m_map.c m_P.c m_num_q.c m_num_ret.c m_num_rel.c \
2931
m_num_rel_ret.c m_gm_map.c m_Rprec.c m_recip_rank.c m_bpref.c \
3032
m_iprec_at_recall.c m_recall.c m_Rprec_mult.c m_utility.c m_11pt_avg.c \
31-
m_ndcg.c m_ndcg_cut.c m_ndcg_p.c m_rel_P.c m_success.c m_infap.c \
32-
m_gm_bpref.c m_runid.c \
33+
m_ndcg.c m_ndcg_cut.c m_Rndcg.c m_ndcg_rel.c \
34+
m_binG.c m_G.c \
35+
m_rel_P.c m_success.c m_infap.c m_map_cut.c \
36+
m_gm_bpref.c m_runid.c m_relstring.c \
3337
m_set_P.c m_set_recall.c m_set_rel_P.c m_set_map.c m_set_F.c \
3438
m_num_nonrel_judged_ret.c \
3539
m_prefs_num_prefs_poss.c m_prefs_num_prefs_ful.c \
@@ -38,7 +42,7 @@ MEAS_SRCS = measures.c m_map.c m_P.c m_num_q.c m_num_ret.c m_num_rel.c \
3842
m_prefs_simp_ret.c m_prefs_pair_ret.c m_prefs_avgjg_ret.c\
3943
m_prefs_avgjg_Rnonrel_ret.c \
4044
m_prefs_simp_imp.c m_prefs_pair_imp.c m_prefs_avgjg_imp.c\
41-
m_map_avgjg.c m_Rprec_mult_avgjg.c m_P_avgjg.c
45+
m_map_avgjg.c m_Rprec_mult_avgjg.c m_P_avgjg.c m_yaap.c
4246

4347
SRCS = $(TOP_SRCS) $(FORMAT_SRCS) $(MEAS_SRCS)
4448

@@ -57,11 +61,12 @@ quicktest: trec_eval
5761
./trec_eval -m all_trec -q test/qrels.test test/results.test | diff - test/out.test.aq
5862
./trec_eval -m all_trec -q -c test/qrels.test test/results.trunc | diff - test/out.test.aqc
5963
./trec_eval -m all_trec -q -c -M100 test/qrels.test test/results.trunc | diff - test/out.test.aqcM
60-
./trec_eval -m all_trec -q -l2 test/qrels.rel_level test/results.test | diff - test/out.test.aql
64+
./trec_eval -m all_trec -mrelstring.20 -q -l2 test/qrels.rel_level test/results.test | diff - test/out.test.aql
6165
./trec_eval -m all_prefs -q -R prefs test/prefs.test test/prefs.results.test | diff - test/out.test.prefs
6266
./trec_eval -m all_prefs -q -R qrels_prefs test/qrels.test test/results.test | diff - test/out.test.qrels_prefs
6367
./trec_eval -m qrels_jg -q -R qrels_jg test/qrels.123 test/results.test | diff - test/out.test.qrels_jg
64-
./trec_eval -q -miprec_at_recall..10,.20,.25,.75,.50 -m P.5,7,3 -m recall.20,2000 -m Rprec_mult.5.0,0.2,0.35 -mutility.2,-1,0,0 -m 11pt_avg..25,.5,.75 -mndcg_p.1=3,2=9,4=4.5 -mndcg_cut.10,20,23.4 -msuccess.2,5,20 test/qrels.test test/results.test | diff - test/out.test.meas_params
68+
./trec_eval -q -miprec_at_recall..10,.20,.25,.75,.50 -m P.5,7,3 -m recall.20,2000 -m Rprec_mult.5.0,0.2,0.35 -mutility.2,-1,0,0 -m 11pt_avg..25,.5,.75 -mndcg.1=3,2=9,4=4.5 -mndcg_cut.10,20,23.4 -msuccess.2,5,20 test/qrels.test test/results.test | diff - test/out.test.meas_params
69+
./trec_eval -q -m all_trec -Z test/zscores_file test/qrels.test test/results.test | diff - test/out.test.aqZ
6570
/bin/echo "Test succeeeded"
6671

6772
longtest: trec_eval
@@ -71,11 +76,12 @@ longtest: trec_eval
7176
./trec_eval -m all_trec -q test/qrels.test test/results.test > test.long/out.test.aq
7277
./trec_eval -m all_trec -q -c test/qrels.test test/results.trunc > test.long/out.test.aqc
7378
./trec_eval -m all_trec -q -c -M100 test/qrels.test test/results.trunc > test.long/out.test.aqcM
74-
./trec_eval -m all_trec -q -l2 test/qrels.rel_level test/results.test > test.long/out.test.aql
79+
./trec_eval -m all_trec -mrelstring.20 -q -l2 test/qrels.rel_level test/results.test > test.long/out.test.aql
7580
./trec_eval -m all_prefs -q -R prefs test/prefs.test test/prefs.results.test > test.long/out.test.prefs
7681
./trec_eval -m all_prefs -q -R qrels_prefs test/qrels.test test/results.test > test.long/out.test.qrels_prefs
7782
./trec_eval -m qrels_jg -q -R qrels_jg test/qrels.123 test/results.test > test.long/out.test.qrels_jg
78-
./trec_eval -q -miprec_at_recall..10,.20,.25,.75,.50 -m P.5,7,3 -m recall.20,2000 -m Rprec_mult.5.0,0.2,0.35 -mutility.2,-1,0,0 -m 11pt_avg..25,.5,.75 -mndcg_p.1=3,2=9,4=4.5 -mndcg_cut.10,20,23.4 -msuccess.2,5,20 test/qrels.test test/results.test > test.long/out.test.meas_params
83+
./trec_eval -q -miprec_at_recall..10,.20,.25,.75,.50 -m P.5,7,3 -m recall.20,2000 -m Rprec_mult.5.0,0.2,0.35 -mutility.2,-1,0,0 -m 11pt_avg..25,.5,.75 -mndcg.1=3,2=9,4=4.5 -mndcg_cut.10,20,23.4 -msuccess.2,5,20 test/qrels.test test/results.test > test.long/out.test.meas_params
84+
./trec_eval -q -m all_trec -Z test/zscores_file test/qrels.test test/results.test > test.long/out.test.aqZ
7985
diff test.long test
8086

8187
$(BIN)/trec_eval: trec_eval

Diff for: README

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ If you wish to output only one particular measure:
2727
------------------------------------------------------------------------------
2828
Change Log (only recent)
2929
------------------------------------------------------------------------------
30+
12/31/08
31+
comments and documentation of Zscore file format corrected.
32+
trec_eval.c get_zscores.c
3033
2/25/08 Version 9.0alpha.
3134
Complete rewrite of entire trec_eval (needed for a long time!).
3235
Complete separation of individual measure calculations -

Diff for: convert_zscores.c

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
Copyright (c) 2008 - Chris Buckley.
3+
4+
Permission is granted for use and modification of this file for
5+
research, non-commercial purposes.
6+
*/
7+
8+
#include "common.h"
9+
#include "sysfunc.h"
10+
#include "trec_eval.h"
11+
#include "functions.h"
12+
#include "trec_format.h"
13+
/*
14+
Convert all values in a trec_eval object for a single query from being
15+
a raw score into being a zscore: score expressed in units of standard
16+
deviation from a mean. Means and standard deviations from a reference
17+
set of runs are given by all_zscores.
18+
19+
If the mean and stddev values for this measure and query are not found
20+
in all_zscores, then the value is set to MISSING_ZSCORE_VALUE.
21+
*/
22+
23+
static ZSCORES *find_qid (const ALL_ZSCORES *all_zscores, const char *qid);
24+
static ZSCORE_QID *find_meas (const ZSCORES *zscores, const char *meas);
25+
26+
int
27+
te_convert_to_zscore (const ALL_ZSCORES *all_zscores, TREC_EVAL *q_eval)
28+
{
29+
int return_value = 1;
30+
long i;
31+
ZSCORES *zscores;
32+
ZSCORE_QID *zscores_qid;
33+
34+
/* Do a binary search to find q_eval->qid */
35+
if (NULL == (zscores = find_qid (all_zscores, q_eval->qid))) {
36+
/* If q_eval->qid not found, set all values in q_eval to
37+
MISSING_ZSCORE_VALUE and return 0 */
38+
for (i = 0; i < q_eval->num_values; i++)
39+
q_eval->values[i].value = MISSING_ZSCORE_VALUE;
40+
return (0);
41+
}
42+
43+
/* For each measure in q_eval, do a binary search for that measure
44+
within zscores */
45+
for (i = 0; i < q_eval->num_values; i++) {
46+
if (NULL == (zscores_qid = find_meas (zscores,
47+
q_eval->values[i].name))) {
48+
q_eval->values[i].value = MISSING_ZSCORE_VALUE;
49+
return_value = 0;
50+
}
51+
else {
52+
if (zscores_qid->stddev)
53+
q_eval->values[i].value =
54+
(q_eval->values[i].value - zscores_qid->mean) /
55+
zscores_qid->stddev;
56+
else {
57+
if (q_eval->values[i].value == zscores_qid->mean)
58+
q_eval->values[i].value = 0;
59+
else {
60+
q_eval->values[i].value = MISSING_ZSCORE_VALUE;
61+
return_value = 0;
62+
}
63+
}
64+
}
65+
}
66+
67+
return (return_value);
68+
}
69+
70+
static ZSCORES *
71+
find_qid (const ALL_ZSCORES *all_zscores, const char *qid)
72+
{
73+
ZSCORES *base;
74+
long start, end, current;
75+
int result;
76+
77+
base = all_zscores->q_zscores;
78+
end = all_zscores->num_q_zscores;
79+
start = 0;
80+
while (start <= end) {
81+
current = (start + end) / 2;
82+
result = strcmp (qid, base[current].qid);
83+
if (result < 0)
84+
end = current - 1;
85+
else if (result > 0)
86+
start = current + 1;
87+
else
88+
return (&base[current]);
89+
}
90+
return (NULL);
91+
}
92+
93+
static ZSCORE_QID *
94+
find_meas (const ZSCORES *zscores, const char *meas)
95+
{
96+
ZSCORE_QID *base;
97+
long start, end, current;
98+
int result;
99+
100+
base = zscores->zscores;
101+
end = zscores->num_zscores;
102+
start = 0;
103+
while (start <= end) {
104+
current = (start + end) / 2;
105+
result = strcmp (meas, base[current].meas);
106+
if (result < 0)
107+
end = current - 1;
108+
else if (result > 0)
109+
start = current + 1;
110+
else
111+
return (&base[current]);
112+
}
113+
return (NULL);
114+
}
115+

Diff for: functions.h

+9-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@ void * te_chk_and_malloc (void *ptr, long *current_bound,
1313
const long needed, const size_t size);
1414
void * te_chk_and_realloc (void *ptr, long *current_bound,
1515
const long needed, const int size);
16-
16+
/* Functions for dealing with zscores */
17+
int te_get_zscores (const EPI *epi, const char *zscores_file,
18+
ALL_ZSCORES *zscores);
19+
int te_get_zscores_cleanup ();
20+
int te_convert_to_zscore (const ALL_ZSCORES *all_zscores, TREC_EVAL *q_eval);
1721

1822
/* ------------------- Generic Routines for Measures ------------------------ */
1923

@@ -51,16 +55,16 @@ int te_acc_meas_a_cut (const EPI *epi, const TREC_MEAS *tm,
5155
/* Code is in meas_calc_avg.c */
5256
/* Measure does not require final averaging */
5357
int te_calc_avg_meas_empty (const EPI *epi, const TREC_MEAS *tm,
54-
TREC_EVAL *eval);
58+
const ALL_REL_INFO *all_rel_info, TREC_EVAL *eval);
5559
/* Measure is a single value with averaging */
5660
int te_calc_avg_meas_s (const EPI *epi, const TREC_MEAS *tm,
57-
TREC_EVAL *accum_eval);
61+
const ALL_REL_INFO *all_rel_info, TREC_EVAL *accum_eval);
5862
/* Measure is an array of values (one per cutoff) with averaging */
5963
int te_calc_avg_meas_a_cut (const EPI *epi, const TREC_MEAS *tm,
60-
TREC_EVAL *accum_eval);
64+
const ALL_REL_INFO *all_rel_info, TREC_EVAL *accum_eval);
6165
/* Measure is a single value using geometric mean */
6266
int te_calc_avg_meas_s_gm (const EPI *epi, const TREC_MEAS *tm,
63-
TREC_EVAL *accum_eval);
67+
const ALL_REL_INFO *all_rel_info, TREC_EVAL *accum_eval);
6468

6569
/* ----- Print single query for measure ---- */
6670
/* Code is in meas_print_single.c */

Diff for: get_prefs.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ For example:
5757
qid1 ujg1 sub2 docno2 1.0
5858
qid1 ujg1 sub2 docno4 3.0
5959
expressses 5 preferences (1>2, 1>3, 2 > 3, 4>1, 4>2). Note the duplicate
60-
1 > 2 is not counted as a separate preference
60+
1 > 2 is not counted as a separate preference.
61+
A conventional pairwise preference file with no transistivity could be converted
62+
into this form, with two entries per JSG, the preferred doc with a rel of 2.0 and
63+
the non-preferred doc with a rel of 1.0.
6164
6265
Multiple users are indicated by different JGs.
6366
For example:
@@ -183,8 +186,8 @@ te_get_prefs (EPI *epi, char *text_prefs_file, ALL_REL_INFO *all_rel_info)
183186
if (UNDEF == parse_prefs_line (&ptr, &line_ptr->qid, &line_ptr->jg,
184187
&line_ptr->jsg, &line_ptr->docno,
185188
&line_ptr->rel)) {
186-
fprintf (stderr, "trec_eval.get_prefs: Malformed line %d\n",
187-
line_ptr - lines + 1);
189+
fprintf (stderr, "trec_eval.get_prefs: Malformed line %ld\n",
190+
(long) (line_ptr - lines + 1));
188191
return (UNDEF);
189192
}
190193
line_ptr++;

Diff for: get_qrels.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ te_get_qrels (EPI *epi, char *text_qrels_file, ALL_REL_INFO *all_rel_info)
122122
while (*ptr) {
123123
if (UNDEF == parse_qrels_line (&ptr, &line_ptr->qid,
124124
&line_ptr->docno, &line_ptr->rel)) {
125-
fprintf (stderr, "trec_eval.get_qrels: Malformed line %d\n",
126-
line_ptr - lines + 1);
125+
fprintf (stderr, "trec_eval.get_qrels: Malformed line %ld\n",
126+
(long) (line_ptr - lines + 1));
127127
return (UNDEF);
128128
}
129129
line_ptr++;

Diff for: get_qrels_jg.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ te_get_qrels_jg (EPI *epi, char *text_qrels_file, ALL_REL_INFO *all_rel_info)
131131
while (*ptr) {
132132
if (UNDEF == parse_qrels_line (&ptr, &line_ptr->qid, &line_ptr->jg,
133133
&line_ptr->docno, &line_ptr->rel)) {
134-
fprintf (stderr, "trec_eval.get_qrels_jg: Malformed line %d\n",
135-
line_ptr - lines + 1);
134+
fprintf (stderr, "trec_eval.get_qrels_jg: Malformed line %ld\n",
135+
(long) (line_ptr - lines + 1));
136136
return (UNDEF);
137137
}
138138
line_ptr++;

Diff for: get_qrels_prefs.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ te_get_qrels_prefs (EPI *epi, char *text_prefs_file, ALL_REL_INFO *all_rel_info)
156156
while (*ptr) {
157157
if (UNDEF == parse_qrels_prefs_line (&ptr, &line_ptr->qid,&line_ptr->jg,
158158
&line_ptr->docno, &line_ptr->rel)){
159-
fprintf (stderr, "trec_eval.get_qrels_prefs: Malformed line %d\n",
160-
line_ptr - lines + 1);
159+
fprintf (stderr, "trec_eval.get_qrels_prefs: Malformed line %ld\n",
160+
(long) (line_ptr - lines + 1));
161161
return (UNDEF);
162162
}
163163
line_ptr++;

Diff for: get_trec_results.c

+28-11
Original file line numberDiff line numberDiff line change
@@ -54,31 +54,48 @@ te_get_trec_results (EPI *epi, char *text_results_file,
5454
ALL_RESULTS *all_results)
5555
{
5656
int fd;
57-
int size = 0;
57+
char *orig_buf;
58+
size_t size = 0;
5859
char *ptr;
5960
char *current_qid;
6061
long i;
6162
LINES *lines;
6263
LINES *line_ptr;
63-
long num_lines;
64+
size_t num_lines;
6465
long num_qid;
6566
char *run_id_ptr = NULL;
6667
/* current pointers into static pools above */
6768
RESULTS *q_results_ptr;
6869
TEXT_RESULTS_INFO *text_info_ptr;
6970
TEXT_RESULTS *text_results_ptr;
7071

71-
/* Read entire file into memory */
72+
/* mmap entire file into memory and copy it into writable memory */
7273
if (-1 == (fd = open (text_results_file, 0)) ||
7374
0 >= (size = lseek (fd, 0L, 2)) ||
74-
NULL == (trec_results_buf = malloc ((unsigned) size+2)) ||
75-
-1 == lseek (fd, 0L, 0) ||
76-
size != read (fd, trec_results_buf, size) ||
77-
-1 == close (fd)) {
78-
fprintf (stderr,
75+
(char *) -1 == (orig_buf = (char *) mmap ((caddr_t) 0,
76+
(size_t) size,
77+
PROT_READ,
78+
MAP_SHARED,
79+
fd,
80+
(off_t) 0))) {
81+
fprintf (stderr,
7982
"trec_eval.get_results: Cannot read results file '%s'\n",
8083
text_results_file);
81-
return (UNDEF);
84+
return (UNDEF);
85+
}
86+
if (NULL == (trec_results_buf = malloc ((size_t) size+2))) {
87+
fprintf (stderr,
88+
"trec_eval.get_results: Cannot copy results file '%s'\n",
89+
text_results_file);
90+
return (UNDEF);
91+
}
92+
(void) memcpy (trec_results_buf, orig_buf, size);
93+
if (-1 == munmap (orig_buf, size) ||
94+
-1 == close (fd)) {
95+
fprintf (stderr,
96+
"trec_eval.get_results: Cannot close results file '%s'\n",
97+
text_results_file);
98+
return (UNDEF);
8299
}
83100

84101
/* Append ending newline if not present, Append NULL terminator */
@@ -108,8 +125,8 @@ te_get_trec_results (EPI *epi, char *text_results_file,
108125
}
109126
if (UNDEF == parse_results_line (&ptr, &line_ptr->qid,&line_ptr->docno,
110127
&line_ptr->sim, &run_id_ptr)) {
111-
fprintf (stderr, "trec_eval.get_results: Malformed line %d\n",
112-
line_ptr - lines + 1);
128+
fprintf (stderr, "trec_eval.get_results: Malformed line %ld\n",
129+
(long) (line_ptr - lines + 1));
113130
return (UNDEF);
114131
}
115132
line_ptr++;

0 commit comments

Comments
 (0)