Scanner C++ API
cycle_timer.h
1 #ifndef _SYRAH_CYCLE_TIMER_H_
2 #define _SYRAH_CYCLE_TIMER_H_
3 
4 #if defined(__APPLE__)
5 #if defined(__x86_64__)
6 #include <sys/sysctl.h>
7 #else
8 #include <mach/mach.h>
9 #include <mach/mach_time.h>
10 #endif // __x86_64__ or not
11 
12 #include <stdio.h> // fprintf
13 #include <stdlib.h> // exit
14 
15 #elif _WIN32
16 #include <time.h>
17 #include <windows.h>
18 #else
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/time.h>
23 #endif
24 
25 namespace scanner {
26 
27 // This uses the cycle counter of the processor. Different
28 // processors in the system will have different values for this. If
29 // you process moves across processors, then the delta time you
30 // measure will likely be incorrect. This is mostly for fine
31 // grained measurements where the process is likely to be on the
32 // same processor. For more global things you should use the
33 // Time interface.
34 
35 // Also note that if you processors' speeds change (i.e. processors
36 // scaling) or if you are in a heterogenous environment, you will
37 // likely get spurious results.
38 class CycleTimer {
39  public:
40  typedef unsigned long long SysClock;
41 
43  // Return the current CPU time, in terms of clock ticks.
44  // Time zero is at some arbitrary point in the past.
45  static SysClock currentTicks() {
46 #if defined(__APPLE__) && !defined(__x86_64__)
47  return mach_absolute_time();
48 #elif defined(_WIN32)
49  LARGE_INTEGER qwTime;
50  QueryPerformanceCounter(&qwTime);
51  return qwTime.QuadPart;
52 #elif defined(__x86_64__)
53  unsigned int a, d;
54  asm volatile("rdtsc" : "=a"(a), "=d"(d));
55  return static_cast<unsigned long long>(a) |
56  (static_cast<unsigned long long>(d) << 32);
57 #elif defined(__ARM_NEON__) && 0 // mrc requires superuser.
58  unsigned int val;
59  asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
60  return val;
61 #else
62  timespec spec;
63  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec);
64  return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 +
65  static_cast<float>(spec.tv_nsec));
66 #endif
67  }
68 
70  // Return the current CPU time, in terms of seconds.
71  // This is slower than currentTicks(). Time zero is at
72  // some arbitrary point in the past.
73  static double currentSeconds() { return currentTicks() * secondsPerTick(); }
74 
76  // Return the conversion from seconds to ticks.
77  static double ticksPerSecond() { return 1.0 / secondsPerTick(); }
78 
79  static const char* tickUnits() {
80 #if defined(__APPLE__) && !defined(__x86_64__)
81  return "ns";
82 #elif defined(__WIN32__) || defined(__x86_64__)
83  return "cycles";
84 #else
85  return "ns"; // clock_gettime
86 #endif
87  }
88 
90  // Return the conversion from ticks to seconds.
91  static double secondsPerTick() {
92  static bool initialized = false;
93  static double secondsPerTick_val;
94  if (initialized) return secondsPerTick_val;
95 #if defined(__APPLE__)
96 #ifdef __x86_64__
97  int args[] = {CTL_HW, HW_CPU_FREQ};
98  unsigned int Hz;
99  size_t len = sizeof(Hz);
100  if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) {
101  fprintf(stderr, "Failed to initialize secondsPerTick_val!\n");
102  exit(-1);
103  }
104  secondsPerTick_val = 1.0 / (double)Hz;
105 #else
106  mach_timebase_info_data_t time_info;
107  mach_timebase_info(&time_info);
108 
109  // Scales to nanoseconds without 1e-9f
110  secondsPerTick_val = (1e-9 * static_cast<double>(time_info.numer)) /
111  static_cast<double>(time_info.denom);
112 #endif // x86_64 or not
113 #elif defined(_WIN32)
114  LARGE_INTEGER qwTicksPerSec;
115  QueryPerformanceFrequency(&qwTicksPerSec);
116  secondsPerTick_val = 1.0 / static_cast<double>(qwTicksPerSec.QuadPart);
117 #else
118  FILE* fp = fopen("/proc/cpuinfo", "r");
119  char input[1024];
120  if (!fp) {
121  fprintf(stderr,
122  "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo.");
123  exit(-1);
124  }
125  // In case we don't find it, e.g. on the N900
126  secondsPerTick_val = 1e-9;
127  while (!feof(fp) && fgets(input, 1024, fp)) {
128  // NOTE(boulos): Because reading cpuinfo depends on dynamic
129  // frequency scaling it's better to read the @ sign first
130  float GHz, MHz;
131  if (strstr(input, "model name")) {
132  char* at_sign = strstr(input, "@");
133  if (at_sign) {
134  char* after_at = at_sign + 1;
135  char* GHz_str = strstr(after_at, "GHz");
136  char* MHz_str = strstr(after_at, "MHz");
137  if (GHz_str) {
138  *GHz_str = '\0';
139  if (1 == sscanf(after_at, "%f", &GHz)) {
140  // printf("GHz = %f\n", GHz);
141  secondsPerTick_val = 1e-9f / GHz;
142  break;
143  }
144  } else if (MHz_str) {
145  *MHz_str = '\0';
146  if (1 == sscanf(after_at, "%f", &MHz)) {
147  // printf("MHz = %f\n", MHz);
148  secondsPerTick_val = 1e-6f / GHz;
149  break;
150  }
151  }
152  }
153  } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) {
154  // printf("MHz = %f\n", MHz);
155  secondsPerTick_val = 1e-6f / MHz;
156  break;
157  }
158  }
159  fclose(fp);
160 #endif
161 
162  initialized = true;
163  return secondsPerTick_val;
164  }
165 
167  // Return the conversion from ticks to milliseconds.
168  static double msPerTick() { return secondsPerTick() * 1000.0; }
169 
170  private:
171  CycleTimer();
172 };
173 }
174 
175 #endif // #ifndef _SYRAH_CYCLE_TIMER_H_
Definition: database.cpp:36
Definition: cycle_timer.h:38