Skip to content

Commit 8cae899

Browse files
committed
Merge pull request #285 from bosilca/master
Reenable high accuracy timers
2 parents f48b901 + 43901fa commit 8cae899

File tree

10 files changed

+109
-90
lines changed

10 files changed

+109
-90
lines changed

ompi/mpi/c/wtick.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ double MPI_Wtick(void)
4040
{
4141
OPAL_CR_NOOP_PROGRESS();
4242

43-
#if OPAL_TIMER_USEC_NATIVE
43+
#if OPAL_TIMER_CYCLE_NATIVE
44+
return opal_timer_base_get_freq();
45+
#elif OPAL_TIMER_USEC_NATIVE
4446
return 0.000001;
4547
#else
4648
/* Otherwise, we already return usec precision. */

ompi/mpi/c/wtime.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ double MPI_Wtime(void)
4040
{
4141
double wtime;
4242

43-
#if OPAL_TIMER_USEC_NATIVE
43+
#if OPAL_TIMER_CYCLE_NATIVE
44+
wtime = ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq();
45+
#elif OPAL_TIMER_USEC_NATIVE
4446
wtime = ((double) opal_timer_base_get_usec()) / 1000000.0;
4547
#else
4648
/* Fall back to gettimeofday() if we have nothing else */

opal/include/opal/sys/amd64/timer.h

+22-17
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -25,29 +25,34 @@ typedef uint64_t opal_timer_t;
2525

2626
#if OPAL_GCC_INLINE_ASSEMBLY
2727

28-
#if 0
28+
/**
29+
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
30+
*/
2931
static inline opal_timer_t
3032
opal_sys_timer_get_cycles(void)
3133
{
32-
opal_timer_t ret;
33-
34-
__asm__ __volatile__("rdtsc" : "=A"(ret));
35-
36-
return ret;
37-
}
38-
34+
unsigned a, d;
35+
#if 0
36+
__asm__ __volatile__ ("cpuid\n\t"
37+
"rdtsc\n\t"
38+
: "=a" (a), "=d" (d)
39+
:: "rbx", "rcx");
3940
#else
40-
41-
static inline opal_timer_t
42-
opal_sys_timer_get_cycles(void)
43-
{
44-
unsigned a, d;
45-
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
41+
/* If we need higher accuracy we should implement the algorithm proposed
42+
* on the Intel document referenced above. However, in the context of MPI
43+
* this function will be used as the backend for MPI_Wtime and as such
44+
* can afford a small inaccuracy.
45+
*/
46+
__asm__ __volatile__ ("rdtscp\n\t"
47+
"mov %%edx, %0\n\t"
48+
"mov %%eax, %1\n\t"
49+
"cpuid\n\t"
50+
: "=r" (a), "=r" (d)
51+
:: "rax", "rbx", "rcx", "rdx");
52+
#endif
4653
return ((opal_timer_t)a) | (((opal_timer_t)d) << 32);
4754
}
4855

49-
#endif
50-
5156
#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
5257

5358
#else

opal/include/opal/sys/ia32/timer.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -30,7 +30,10 @@ opal_sys_timer_get_cycles(void)
3030
{
3131
opal_timer_t ret;
3232

33-
__asm__ __volatile__("rdtsc" : "=A"(ret));
33+
__asm__ __volatile__("cpuid\n"
34+
"rdtsc\n"
35+
: "=A"(ret)
36+
:: "ebx", "ecx", "edx");
3437

3538
return ret;
3639
}

opal/mca/timer/aix/timer_aix.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2006 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -39,7 +39,7 @@ opal_timer_base_get_usec()
3939
retval = (t.tb_high * 1000000) + t.tb_low / 1000;
4040

4141
return retval;
42-
}
42+
}
4343

4444
static inline opal_timer_t
4545
opal_timer_base_get_cycles()

opal/mca/timer/altix/timer_altix.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -36,7 +36,7 @@ static inline opal_timer_t
3636
opal_timer_base_get_usec(void)
3737
{
3838
return opal_timer_base_get_cycles() / opal_timer_altix_usec_conv;
39-
}
39+
}
4040

4141

4242
static inline opal_timer_t

opal/mca/timer/darwin/timer_darwin.h

+25-10
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -26,24 +26,39 @@ typedef uint64_t opal_timer_t;
2626

2727
/* frequency in mhz */
2828
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_freq;
29+
OPAL_DECLSPEC extern mach_timebase_info_data_t opal_timer_darwin_info;
30+
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_bias;
2931

30-
32+
/**
33+
* Use the pragmatic solution proposed at
34+
* http://stackoverflow.com/questions/23378063/how-can-i-use-mach-absolute-time-without-overflowing/23378064#23378064
35+
*/
3136
static inline opal_timer_t
3237
opal_timer_base_get_cycles(void)
3338
{
34-
/* this is basically a wrapper around the "right" assembly to get
35-
the tick counter off the PowerPC Time Base. I believe it's
36-
something similar on x86 */
37-
return mach_absolute_time();
39+
uint64_t now = mach_absolute_time();
40+
41+
if( opal_timer_darwin_info.denom == 0 ) {
42+
(void)mach_timebase_info(&opal_timer_darwin_info);
43+
if( opal_timer_darwin_info.denom > 1024 ) {
44+
double frac = (double)opal_timer_darwin_info.numer/opal_timer_darwin_info.denom;
45+
opal_timer_darwin_info.denom = 1024;
46+
opal_timer_darwin_info.numer = opal_timer_darwin_info.denom * frac + 0.5;
47+
}
48+
opal_timer_darwin_bias = now;
49+
}
50+
/* this is basically a wrapper around the "right" assembly to convert
51+
the tick counter off the PowerPC Time Base into nanos. */
52+
return (now - opal_timer_darwin_bias) * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom;
3853
}
3954

4055

4156
static inline opal_timer_t
4257
opal_timer_base_get_usec(void)
4358
{
4459
/* freq is in Hz, so this gives usec */
45-
return mach_absolute_time() * 1000000 / opal_timer_darwin_freq;
46-
}
60+
return opal_timer_base_get_cycles() / 1000;
61+
}
4762

4863

4964
static inline opal_timer_t
@@ -53,9 +68,9 @@ opal_timer_base_get_freq(void)
5368
}
5469

5570

56-
#define OPAL_TIMER_CYCLE_NATIVE 1
71+
#define OPAL_TIMER_CYCLE_NATIVE 0
5772
#define OPAL_TIMER_CYCLE_SUPPORTED 1
58-
#define OPAL_TIMER_USEC_NATIVE 0
73+
#define OPAL_TIMER_USEC_NATIVE 1
5974
#define OPAL_TIMER_USEC_SUPPORTED 1
6075

6176
#endif

opal/mca/timer/darwin/timer_darwin_component.c

+42-50
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -20,17 +20,16 @@
2020

2121
#include "opal_config.h"
2222

23-
#include <mach/mach_time.h>
24-
2523
#include "opal/mca/timer/timer.h"
2624
#include "opal/mca/timer/darwin/timer_darwin.h"
2725
#include "opal/constants.h"
2826

2927
opal_timer_t opal_timer_darwin_freq;
28+
mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0};
29+
opal_timer_t opal_timer_darwin_bias;
3030

3131
static int opal_timer_darwin_open(void);
3232

33-
3433
const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
3534
/* First, the mca_component_t struct containing meta information
3635
about the component itself */
@@ -53,55 +52,48 @@ const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
5352
},
5453
};
5554

56-
55+
/* mach_timebase_info() returns a fraction that can be multiplied
56+
by the difference between two calls to mach_absolute_time() to
57+
get the number of nanoseconds that passed between the two
58+
calls.
59+
60+
On PPC, mach_timebase_info returns numer = 1000000000 and denom
61+
= 33333335 (or possibly 25000000, depending on the machine).
62+
mach_absolute_time() returns a cycle count from the global
63+
clock, which runs at 25 - 33MHz, so dividing the cycle count by
64+
the frequency gives you seconds between the interval, then
65+
multiplying by 1000000000 gives you nanoseconds. Of course,
66+
you should do the multiply first, then the divide to reduce
67+
arithmetic errors due to integer math. But since we want the
68+
least amount of math in the critical path as possible and
69+
mach_absolute_time is already a cycle counter, we claim we have
70+
native cycle count support and set the frequencey to be the
71+
frequencey of the global clock, which is sTBI.denom *
72+
(1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
73+
sTBI.denom.
74+
75+
On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
76+
meaning that mach_absolute_time() returns some global clock
77+
time in nanoseconds. Because PPC returns a frequency and
78+
returning a time in microseconds would still require math in
79+
the critical path (a divide, at that), we pretend that the
80+
nanosecond timer is instead a cycle counter for a 1GHz clock
81+
and that we're returning a cycle count natively. so sTBI.denom
82+
* (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
83+
1000000000, meaning we have a 1GHz clock.
84+
85+
More generally, since mach_timebase_info() gives the "keys" to
86+
transition the return from mach_absolute_time() into
87+
nanoseconds, taking the reverse of that and multipling by
88+
1000000000 will give you a frequency in cycles / second if you
89+
think of mach_absolute_time() always returning a cycle count.
90+
*/
5791
int opal_timer_darwin_open(void)
5892
{
59-
mach_timebase_info_data_t sTBI;
60-
61-
mach_timebase_info(&sTBI);
62-
63-
/* mach_timebase_info() returns a fraction that can be multiplied
64-
by the difference between two calls to mach_absolute_time() to
65-
get the number of nanoseconds that passed between the two
66-
calls.
67-
68-
On PPC, mach_timebase_info returns numer = 1000000000 and denom
69-
= 33333335 (or possibly 25000000, depending on the machine).
70-
mach_absolute_time() returns a cycle count from the global
71-
clock, which runs at 25 - 33MHz, so dividing the cycle count by
72-
the frequency gives you seconds between the interval, then
73-
multiplying by 1000000000 gives you nanoseconds. Of course,
74-
you should do the multiply first, then the divide to reduce
75-
arithmetic errors due to integer math. But since we want the
76-
least amount of math in the critical path as possible and
77-
mach_absolute_time is already a cycle counter, we claim we have
78-
native cycle count support and set the frequencey to be the
79-
frequencey of the global clock, which is sTBI.denom *
80-
(1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
81-
sTBI.denom.
82-
83-
On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
84-
meaning that mach_absolute_time() returns some global clock
85-
time in nanoseconds. Because PPC returns a frequency and
86-
returning a time in microseconds would still require math in
87-
the critical path (a divide, at that), we pretend that the
88-
nanosecond timer is instead a cycle counter for a 1GHz clock
89-
and that we're returning a cycle count natively. so sTBI.denom
90-
* (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
91-
1000000000, meaning we have a 1GHz clock.
92-
93-
More generally, since mach_timebase_info() gives the "keys" to
94-
transition the return from mach_absolute_time() into
95-
nanoseconds, taking the reverse of that and multipling by
96-
1000000000 will give you a frequency in cycles / second if you
97-
think of mach_absolute_time() always returning a cycle count.
93+
/* Call the opal_timer_base_get_cycles once to start the enging */
94+
(void)opal_timer_base_get_cycles();
9895

99-
By the way, it's interesting to note that because these are
100-
library functions and because of how rosetta works, a PPC
101-
binary running under rosetta on an Intel Mac will behave
102-
exactly like an Intel binary running on an Intel Mac.
103-
*/
104-
opal_timer_darwin_freq = sTBI.denom * (1000000000 / sTBI.numer);
96+
opal_timer_darwin_freq = opal_timer_darwin_info.denom * (1000000000 / opal_timer_darwin_info.numer);
10597

10698
return OPAL_SUCCESS;
10799
}

opal/mca/timer/linux/timer_linux.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -44,7 +44,7 @@ opal_timer_base_get_usec(void)
4444
#else
4545
return 0;
4646
#endif
47-
}
47+
}
4848

4949

5050
static inline opal_timer_t
@@ -56,7 +56,7 @@ opal_timer_base_get_freq(void)
5656

5757
#define OPAL_TIMER_CYCLE_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES
5858
#define OPAL_TIMER_CYCLE_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES
59-
#define OPAL_TIMER_USEC_NATIVE 0
59+
#define OPAL_TIMER_USEC_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES
6060
#define OPAL_TIMER_USEC_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES
6161

6262
#endif

opal/mca/timer/solaris/timer_solaris.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2005 The University of Tennessee and The University
5+
* Copyright (c) 2004-2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -35,7 +35,7 @@ opal_timer_base_get_usec(void)
3535
{
3636
/* gethrtime returns nanoseconds */
3737
return gethrtime() / 1000;
38-
}
38+
}
3939

4040
static inline opal_timer_t
4141
opal_timer_base_get_freq(void)

0 commit comments

Comments
 (0)