diff --git a/doc/src/sgml/ref/pgtesttiming.sgml b/doc/src/sgml/ref/pgtesttiming.sgml
index a5eb3aa25e02..7e0266cf58b7 100644
--- a/doc/src/sgml/ref/pgtesttiming.sgml
+++ b/doc/src/sgml/ref/pgtesttiming.sgml
@@ -93,28 +93,34 @@ PostgreSQL documentation
Good results will show most (>90%) individual timing calls take less than
- one microsecond. Average per loop overhead will be even lower, below 100
- nanoseconds. This example from an Intel i7-860 system using a TSC clock
- source shows excellent performance:
+ one microsecond (1000 nanoseconds). Average per loop overhead will be even
+ lower, below 100 nanoseconds. This example from an Intel i9-9880H system
+ using a TSC clock source shows excellent performance:
- Note that different units are used for the per loop time than the
- histogram. The loop can have resolution within a few nanoseconds (ns),
- while the individual timing calls can only resolve down to one microsecond
- (us).
+ Note that the accuracy of the histogram entries may be lower than the
+ per loop time.
@@ -125,24 +131,25 @@ Histogram of timing durations:
When the query executor is running a statement using
EXPLAIN ANALYZE, individual operations are timed as well
as showing a summary. The overhead of your system can be checked by
- counting rows with the psql program:
+ disabling the per-row timing, using the TIMING OFF
+ option:
-CREATE TABLE t AS SELECT * FROM generate_series(1,100000);
-\timing
-SELECT COUNT(*) FROM t;
-EXPLAIN ANALYZE SELECT COUNT(*) FROM t;
+CREATE TABLE t AS SELECT * FROM generate_series(1, 100000);
+EXPLAIN (ANALYZE, TIMING OFF) SELECT COUNT(*) FROM t;
+EXPLAIN (ANALYZE, TIMING ON) SELECT COUNT(*) FROM t;
- The i7-860 system measured runs the count query in 9.8 ms while
- the EXPLAIN ANALYZE version takes 16.6 ms, each
- processing just over 100,000 rows. That 6.8 ms difference means the timing
- overhead per row is 68 ns, about twice what pg_test_timing estimated it
- would be. Even that relatively small amount of overhead is making the fully
- timed count statement take almost 70% longer. On more substantial queries,
- the timing overhead would be less problematic.
+ The i9-9880H system measured shows an execution time of 4.116 ms for the
+ TIMING OFF query, and 6.965 ms for the
+ TIMING ON, each processing 100,000 rows.
+
+ That 2.849 ms difference means the timing overhead per row is 28 ns. As
+ TIMING ON measures timestamps twice per row returned by
+ an executor node, the overhead is very close to what pg_test_timing
+ estimated it would be.
@@ -157,28 +164,31 @@ EXPLAIN ANALYZE SELECT COUNT(*) FROM t;
/sys/devices/system/clocksource/clocksource0/current_clocksource
# pg_test_timing
-Per loop time including overhead: 722.92 ns
+Testing timing overhead for 3 seconds.
+Per loop time including overhead: 708.58 ns
Histogram of timing durations:
- < us % of total count
- 1 27.84870 1155682
- 2 72.05956 2990371
- 4 0.07810 3241
- 8 0.01357 563
- 16 0.00007 3
+ < ns % of total count
+ 1024 99.79796 4225270
+ 2048 0.15560 6588
+ 4096 0.00035 15
+ 8192 0.01738 736
+ 16384 0.01679 711
+ 32768 0.01190 504
]]>
In this configuration, the sample EXPLAIN ANALYZE above
- takes 115.9 ms. That's 1061 ns of timing overhead, again a small multiple
- of what's measured directly by this utility. That much timing overhead
- means the actual query itself is only taking a tiny fraction of the
- accounted for time, most of it is being consumed in overhead instead. In
- this configuration, any EXPLAIN ANALYZE totals involving
- many timed operations would be inflated significantly by timing overhead.
+ shows an execution time of 148.7 ms. That's 1392 ns of per-row timing
+ overhead. Taking the two timestamps per row into account, that's again
+ close to what pg_test_timing estimated. That much timing overhead means
+ the actual query itself is only taking a tiny fraction of the accounted for
+ time, most of it is being consumed in overhead instead. In this
+ configuration, any EXPLAIN ANALYZE totals involving many
+ timed operations would be inflated significantly by timing overhead.
@@ -196,33 +206,6 @@ kern.timecounter.hardware: ACPI-fast -> TSC
-
- Other systems may only allow setting the time source on boot. On older
- Linux systems the "clock" kernel setting is the only way to make this sort
- of change. And even on some more recent ones, the only option you'll see
- for a clock source is "jiffies". Jiffies are the older Linux software clock
- implementation, which can have good resolution when it's backed by fast
- enough timing hardware, as in this example:
-
-
-
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 3b91d02605a0..a33308119519 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -3363,8 +3363,7 @@ count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
INSTR_TIME_SET_CURRENT(currenttime);
elapsed = currenttime;
INSTR_TIME_SUBTRACT(elapsed, starttime);
- if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
- >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
+ if (INSTR_TIME_GET_MILLISEC(elapsed) >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
{
if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
{
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 56e635f47000..01f67c5d9725 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -67,9 +67,13 @@ InstrInit(Instrumentation *instr, int instrument_options)
void
InstrStartNode(Instrumentation *instr)
{
- if (instr->need_timer &&
- !INSTR_TIME_SET_CURRENT_LAZY(instr->starttime))
- elog(ERROR, "InstrStartNode called twice in a row");
+ if (instr->need_timer)
+ {
+ if (!INSTR_TIME_IS_ZERO(instr->starttime))
+ elog(ERROR, "InstrStartNode called twice in a row");
+ else
+ INSTR_TIME_SET_CURRENT_FAST(instr->starttime);
+ }
/* save buffer usage totals at node entry, if needed */
if (instr->need_bufusage)
@@ -95,7 +99,7 @@ InstrStopNode(Instrumentation *instr, double nTuples)
if (INSTR_TIME_IS_ZERO(instr->starttime))
elog(ERROR, "InstrStopNode called without start");
- INSTR_TIME_SET_CURRENT(endtime);
+ INSTR_TIME_SET_CURRENT_FAST(endtime);
INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->starttime);
INSTR_TIME_SET_ZERO(instr->starttime);
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 318600d6d02e..2e429b30a277 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -786,6 +786,9 @@ InitPostgres(const char *in_dbname, Oid dboid,
/* Initialize portal manager */
EnablePortalManager();
+ /* initialize high-precision interval timing */
+ INSTR_TIME_INITIALIZE();
+
/* Initialize status reporting */
pgstat_beinit();
diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c
index ce7aad4b25a4..62a15465f7c0 100644
--- a/src/bin/pg_test_timing/pg_test_timing.c
+++ b/src/bin/pg_test_timing/pg_test_timing.c
@@ -19,8 +19,8 @@ static void handle_args(int argc, char *argv[]);
static uint64 test_timing(unsigned int duration);
static void output(uint64 loop_count);
-/* record duration in powers of 2 microseconds */
-static long long int histogram[32];
+/* record duration in powers of 2 nanoseconds */
+static uint64 histogram[64];
int
main(int argc, char *argv[])
@@ -121,35 +121,49 @@ handle_args(int argc, char *argv[])
static uint64
test_timing(unsigned int duration)
{
- uint64 total_time;
- int64 time_elapsed = 0;
uint64 loop_count = 0;
- uint64 prev,
- cur;
+ instr_time until_time,
+ total_time;
instr_time start_time,
- end_time,
- temp;
+ end_time;
+ instr_time cur;
- total_time = duration > 0 ? duration * INT64CONST(1000000) : 0;
+ INSTR_TIME_INITIALIZE();
+ INSTR_TIME_SET_CURRENT_FAST(start_time);
- INSTR_TIME_SET_CURRENT(start_time);
- cur = INSTR_TIME_GET_MICROSEC(start_time);
+ /*
+ * To reduce loop overhead, check loop condition in instr_time domain.
+ */
+ INSTR_TIME_SET_SECONDS(total_time, duration);
+ until_time = start_time;
+ INSTR_TIME_ADD(until_time, total_time);
- while (time_elapsed < total_time)
+ cur = start_time;
+
+ while (INSTR_TIME_IS_LT(cur, until_time))
{
- int32 diff,
- bits = 0;
+ instr_time temp;
+ instr_time prev;
+ int64 diff;
+ int32 bits = 0;
prev = cur;
- INSTR_TIME_SET_CURRENT(temp);
- cur = INSTR_TIME_GET_MICROSEC(temp);
- diff = cur - prev;
+ INSTR_TIME_SET_CURRENT_FAST(cur);
+ temp = cur;
+ INSTR_TIME_SUBTRACT(temp, prev);
+ diff = INSTR_TIME_GET_NANOSEC(temp);
/* Did time go backwards? */
- if (diff < 0)
+ if (unlikely(diff <= 0))
{
+ /* can't do anything with that measurement */
+ if (diff == 0)
+ {
+ loop_count++;
+ continue;
+ }
fprintf(stderr, _("Detected clock going backwards in time.\n"));
- fprintf(stderr, _("Time warp: %d ms\n"), diff);
+ fprintf(stderr, _("Time warp: %lld ns\n"), (long long) diff);
exit(1);
}
@@ -164,16 +178,14 @@ test_timing(unsigned int duration)
histogram[bits]++;
loop_count++;
- INSTR_TIME_SUBTRACT(temp, start_time);
- time_elapsed = INSTR_TIME_GET_MICROSEC(temp);
}
- INSTR_TIME_SET_CURRENT(end_time);
+ INSTR_TIME_SET_CURRENT_FAST(end_time);
INSTR_TIME_SUBTRACT(end_time, start_time);
printf(_("Per loop time including overhead: %0.2f ns\n"),
- INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count);
+ (INSTR_TIME_GET_DOUBLE(end_time) * NS_PER_S) / loop_count);
return loop_count;
}
@@ -181,9 +193,10 @@ test_timing(unsigned int duration)
static void
output(uint64 loop_count)
{
- int64 max_bit = 31,
+ int64 low_bit = 0,
+ max_bit = 63,
i;
- char *header1 = _("< us");
+ char *header1 = _("< ns");
char *header2 = /* xgettext:no-c-format */ _("% of total");
char *header3 = _("count");
int len1 = strlen(header1);
@@ -194,15 +207,19 @@ output(uint64 loop_count)
while (max_bit > 0 && histogram[max_bit] == 0)
max_bit--;
+ /* find lowest bit value */
+ while (low_bit < max_bit && histogram[low_bit] == 0)
+ low_bit++;
+
printf(_("Histogram of timing durations:\n"));
printf("%*s %*s %*s\n",
- Max(6, len1), header1,
+ Max(9, len1), header1,
Max(10, len2), header2,
Max(10, len3), header3);
- for (i = 0; i <= max_bit; i++)
- printf("%*ld %*.5f %*lld\n",
- Max(6, len1), 1l << i,
+ for (i = low_bit; i <= max_bit; i++)
+ printf("%*ld %*.5f %*llu\n",
+ Max(9, len1), 1l << i,
Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count,
- Max(10, len3), histogram[i]);
+ Max(10, len3), (long long unsigned) histogram[i]);
}
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index bf099aab2787..716f90578f0e 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -7265,6 +7265,9 @@ main(int argc, char **argv)
initRandomState(&state[i].cs_func_rs);
}
+ /* initialize high-precision interval timing */
+ INSTR_TIME_INITIALIZE();
+
/* opening connection... */
con = doConnect();
if (con == NULL)
diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c
index 5018eedf1e57..789a2aa67635 100644
--- a/src/bin/psql/startup.c
+++ b/src/bin/psql/startup.c
@@ -24,6 +24,7 @@
#include "help.h"
#include "input.h"
#include "mainloop.h"
+#include "portability/instr_time.h"
#include "settings.h"
/*
@@ -327,6 +328,9 @@ main(int argc, char *argv[])
PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL);
+ /* initialize high-precision interval timing */
+ INSTR_TIME_INITIALIZE();
+
SyncVariables();
if (options.list_dbs)
diff --git a/src/common/Makefile b/src/common/Makefile
index 1e2b91c83c4c..194e45e7ae8d 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -59,6 +59,7 @@ OBJS_COMMON = \
file_perm.o \
file_utils.o \
hashfn.o \
+ instr_time.o \
ip.o \
jsonapi.o \
keywords.o \
diff --git a/src/common/instr_time.c b/src/common/instr_time.c
new file mode 100644
index 000000000000..9b18e6e3588d
--- /dev/null
+++ b/src/common/instr_time.c
@@ -0,0 +1,170 @@
+/*-------------------------------------------------------------------------
+ *
+ * instr_time.c
+ * Non-inline parts of the portable high-precision interval timing
+ * implementation
+ *
+ * Portions Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/port/instr_time.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "portability/instr_time.h"
+
+#ifndef WIN32
+/*
+ * Stores what the number of cycles needs to be multiplied with to end up
+ * with nanoseconds using integer math. See comment in pg_initialize_rdtsc()
+ * for more details.
+ *
+ * By default assume we are using clock_gettime() as a fallback which uses
+ * nanoseconds as ticks. Hence, we set the multiplier to the precision scalar
+ * so that the division in INSTR_TIME_GET_NANOSEC() won't change the nanoseconds.
+ *
+ * When using the RDTSC instruction directly this is filled in during initialization
+ * based on the relevant CPUID fields.
+ */
+int64 ticks_per_ns_scaled = TICKS_TO_NS_PRECISION;
+int64 ticks_per_sec = NS_PER_S;
+int64 max_ticks_no_overflow = PG_INT64_MAX / TICKS_TO_NS_PRECISION;
+
+#if defined(__x86_64__) && defined(__linux__)
+/*
+ * Indicates if RDTSC can be used (Linux/x86 only, when OS uses TSC clocksource)
+ */
+bool has_rdtsc = false;
+
+/*
+ * Indicates if RDTSCP can be used. True if RDTSC can be used and RDTSCP is available.
+ */
+bool has_rdtscp = false;
+
+#define CPUID_HYPERVISOR_VMWARE(words) (words[1] == 0x61774d56 && words[2] == 0x4d566572 && words[3] == 0x65726177) /* VMwareVMware */
+#define CPUID_HYPERVISOR_KVM(words) (words[1] == 0x4b4d564b && words[2] == 0x564b4d56 && words[3] == 0x0000004d) /* KVMKVMKVM */
+
+static bool
+get_tsc_frequency_khz(uint32 *tsc_freq)
+{
+ uint32 r[4];
+
+ if (__get_cpuid(0x15, &r[0] /* denominator */ , &r[1] /* numerator */ , &r[2] /* hz */ , &r[3]) && r[2] > 0)
+ {
+ if (r[0] == 0 || r[1] == 0)
+ return false;
+
+ *tsc_freq = r[2] / 1000 * r[1] / r[0];
+ return true;
+ }
+
+ /* Some CPUs only report frequency in 16H */
+ if (__get_cpuid(0x16, &r[0] /* base_mhz */ , &r[1], &r[2], &r[3]))
+ {
+ *tsc_freq = r[0] * 1000;
+ return true;
+ }
+
+ /*
+ * Check if we have a KVM or VMware Hypervisor passing down TSC frequency
+ * to us in a guest VM
+ *
+ * Note that accessing the 0x40000000 leaf for Hypervisor info requires
+ * use of __cpuidex to set ECX to 0.
+ *
+ * TODO: We need to check whether our compiler is new enough
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95973)
+ */
+ __cpuidex((int32 *) r, 0x40000000, 0);
+ if (r[0] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(r) || CPUID_HYPERVISOR_KVM(r)))
+ {
+ __cpuidex((int32 *) r, 0x40000010, 0);
+ if (r[0] > 0)
+ {
+ *tsc_freq = r[0];
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+is_rdtscp_available()
+{
+ uint32 r[4];
+
+ return __get_cpuid(0x80000001, &r[0], &r[1], &r[2], &r[3]) > 0 && (r[3] & (1 << 27)) != 0;
+}
+
+/*
+ * Decide whether we use the RDTSC instruction at runtime, for Linux/x86,
+ * instead of incurring the overhead of a full clock_gettime() call.
+ *
+ * This can't be reliably determined at compile time, since the
+ * availability of an "invariant" TSC (that is not affected by CPU
+ * frequency changes) is dependent on the CPU architecture. Additionally,
+ * there are cases where TSC availability is impacted by virtualization,
+ * where a simple cpuid feature check would not be enough.
+ *
+ * Since Linux already does a significant amount of work to determine
+ * whether TSC is a viable clock source, decide based on that.
+ */
+void
+pg_initialize_rdtsc(void)
+{
+ FILE *fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
+
+ if (fp)
+ {
+ char buf[128];
+
+ if (fgets(buf, sizeof(buf), fp) != NULL && strcmp(buf, "tsc\n") == 0)
+ {
+ /*
+ * Compute baseline CPU peformance, determines speed at which
+ * RDTSC advances.
+ */
+ uint32 tsc_freq;
+
+ if (get_tsc_frequency_khz(&tsc_freq))
+ {
+ /*
+ * Ticks to nanoseconds conversion requires floating point
+ * math because because:
+ *
+ * sec = ticks / frequency_hz ns = ticks / frequency_hz *
+ * 1,000,000,000 ns = ticks * (1,000,000,000 / frequency_hz)
+ * ns = ticks * (1,000,000 / frequency_khz) <-- now in
+ * kilohertz
+ *
+ * Here, 'ns' is usually a floating number. For example for a
+ * 2.5 GHz CPU the scaling factor becomes 1,000,000 /
+ * 2,500,000 = 1.2.
+ *
+ * To be able to use integer math we work around the lack of
+ * precision. We first scale the integer up and after the
+ * multiplication by the number of ticks in
+ * INSTR_TIME_GET_NANOSEC() we divide again by the same value.
+ * We picked the scaler such that it provides enough precision
+ * and is a power-of-two which allows for shifting instead of
+ * doing an integer division.
+ */
+ ticks_per_ns_scaled = INT64CONST(1000000) * TICKS_TO_NS_PRECISION / tsc_freq;
+ ticks_per_sec = tsc_freq * 1000; /* KHz->Hz */
+ max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled;
+
+ has_rdtsc = true;
+ has_rdtscp = is_rdtscp_available();
+ }
+ }
+
+ fclose(fp);
+ }
+}
+#endif /* defined(__x86_64__) && defined(__linux__) */
+
+#endif /* WIN32 */
diff --git a/src/common/meson.build b/src/common/meson.build
index 1540ba67cca4..62b90b3e609c 100644
--- a/src/common/meson.build
+++ b/src/common/meson.build
@@ -13,6 +13,7 @@ common_sources = files(
'file_perm.c',
'file_utils.c',
'hashfn.c',
+ 'instr_time.c',
'ip.c',
'jsonapi.c',
'keywords.c',
diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h
index f71a851b18d8..fd8ca1220012 100644
--- a/src/include/portability/instr_time.h
+++ b/src/include/portability/instr_time.h
@@ -4,9 +4,11 @@
* portable high-precision interval timing
*
* This file provides an abstraction layer to hide portability issues in
- * interval timing. On Unix we use clock_gettime(), and on Windows we use
- * QueryPerformanceCounter(). These macros also give some breathing room to
- * use other high-precision-timing APIs.
+ * interval timing. On Linux/x86 we use the rdtsc instruction when a TSC
+ * clocksource is also used on the host OS. Otherwise, and on other
+ * Unix-like systems we use clock_gettime() and on Windows we use
+ * QueryPerformanceCounter(). These macros also give some breathing
+ * room to use other high-precision-timing APIs.
*
* The basic data type is instr_time, which all callers should treat as an
* opaque typedef. instr_time can store either an absolute time (of
@@ -15,12 +17,17 @@
*
* INSTR_TIME_IS_ZERO(t) is t equal to zero?
*
+ * INSTR_TIME_IS_LT(x, y) x < y
+ *
* INSTR_TIME_SET_ZERO(t) set t to zero (memset is acceptable too)
*
- * INSTR_TIME_SET_CURRENT(t) set t to current time
+ * INSTR_TIME_SET_CURRENT_FAST(t) set t to current time without waiting
+ * for instructions in out-of-order window
+ *
+ * INSTR_TIME_SET_CURRENT(t) set t to current time while waiting for
+ * instructions in OOO to retire
*
- * INSTR_TIME_SET_CURRENT_LAZY(t) set t to current time if t is zero,
- * evaluates to whether t changed
+ * INSTR_TIME_SET_SECONDS(t, s) set t to s seconds
*
* INSTR_TIME_ADD(x, y) x += y
*
@@ -80,7 +87,15 @@ typedef struct instr_time
#ifndef WIN32
+/*
+ * Make sure this is a power-of-two, so that the compiler can turn the
+ * multiplications and divisions into shifts.
+ */
+#define TICKS_TO_NS_PRECISION (1<<14)
+extern int64 ticks_per_ns_scaled;
+extern int64 ticks_per_sec;
+extern int64 max_ticks_no_overflow;
/* Use clock_gettime() */
@@ -106,32 +121,124 @@ typedef struct instr_time
#define PG_INSTR_CLOCK CLOCK_REALTIME
#endif
-/* helper for INSTR_TIME_SET_CURRENT */
+#if defined(__x86_64__) && defined(__linux__)
+#include
+#include
+
+extern bool has_rdtsc;
+extern bool has_rdtscp;
+
+extern void pg_initialize_rdtsc(void);
+#endif
+
static inline instr_time
-pg_clock_gettime_ns(void)
+pg_clock_gettime()
{
instr_time now;
struct timespec tmp;
clock_gettime(PG_INSTR_CLOCK, &tmp);
now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
-
return now;
}
+static inline instr_time
+pg_get_ticks_fast(void)
+{
+#if defined(__x86_64__) && defined(__linux__)
+ if (has_rdtsc)
+ {
+ instr_time now;
+
+ now.ticks = __rdtsc();
+ return now;
+ }
+#endif
+
+ return pg_clock_gettime();
+}
+
+static inline instr_time
+pg_get_ticks(void)
+{
+#if defined(__x86_64__) && defined(__linux__)
+ if (has_rdtscp)
+ {
+ instr_time now;
+ uint32 unused;
+
+ now.ticks = __rdtscp(&unused);
+ return now;
+ }
+#endif
+
+ return pg_clock_gettime();
+}
+
+static inline int64_t
+pg_ticks_to_ns(instr_time t)
+{
+ /*
+ * Would multiplication overflow? If so perform computation in two parts.
+ * Check overflow without actually overflowing via: a * b > max <=> a >
+ * max / b
+ */
+ int64 ns = 0;
+
+ if (unlikely(t.ticks > max_ticks_no_overflow))
+ {
+ /*
+ * Compute how often the maximum number of ticks fits completely into
+ * the number of elapsed ticks and convert that number into
+ * nanoseconds. Then multiply by the count to arrive at the final
+ * value. In a 2nd step we adjust the number of elapsed ticks and
+ * convert the remaining ticks.
+ */
+ int64 count = t.ticks / max_ticks_no_overflow;
+ int64 max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
+
+ ns = max_ns * count;
+
+ /*
+ * Subtract the ticks that we now already accounted for, so that they
+ * don't get counted twice.
+ */
+ t.ticks -= count * max_ticks_no_overflow;
+ Assert(t.ticks >= 0);
+ }
+
+ ns += t.ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
+ return ns;
+}
+
+static inline void
+pg_initialize_get_ticks()
+{
+#if defined(__x86_64__) && defined(__linux__)
+ pg_initialize_rdtsc();
+#endif
+}
+
+#define INSTR_TIME_INITIALIZE() \
+ pg_initialize_get_ticks()
+
+#define INSTR_TIME_SET_CURRENT_FAST(t) \
+ ((t) = pg_get_ticks_fast())
+
#define INSTR_TIME_SET_CURRENT(t) \
- ((t) = pg_clock_gettime_ns())
+ ((t) = pg_get_ticks())
-#define INSTR_TIME_GET_NANOSEC(t) \
- ((int64) (t).ticks)
+#define INSTR_TIME_SET_SECONDS(t, s) \
+ ((t).ticks = (s) * ticks_per_sec)
+#define INSTR_TIME_GET_NANOSEC(t) \
+ pg_ticks_to_ns(t)
#else /* WIN32 */
/* Use QueryPerformanceCounter() */
-/* helper for INSTR_TIME_SET_CURRENT */
static inline instr_time
pg_query_performance_counter(void)
{
@@ -153,9 +260,17 @@ GetTimerFrequency(void)
return (double) f.QuadPart;
}
+#define INSTR_TIME_INITIALIZE()
+
+#define INSTR_TIME_SET_CURRENT_FAST(t) \
+ ((t) = pg_query_performance_counter())
+
#define INSTR_TIME_SET_CURRENT(t) \
((t) = pg_query_performance_counter())
+#define INSTR_TIME_SET_SECONDS(t, s) \
+ ((t).ticks = s * GetTimerFrequency())
+
#define INSTR_TIME_GET_NANOSEC(t) \
((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
@@ -168,13 +283,10 @@ GetTimerFrequency(void)
#define INSTR_TIME_IS_ZERO(t) ((t).ticks == 0)
+#define INSTR_TIME_IS_LT(x, y) ((x).ticks < (y).ticks)
#define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0)
-#define INSTR_TIME_SET_CURRENT_LAZY(t) \
- (INSTR_TIME_IS_ZERO(t) ? INSTR_TIME_SET_CURRENT(t), true : false)
-
-
#define INSTR_TIME_ADD(x,y) \
((x).ticks += (y).ticks)
@@ -184,7 +296,6 @@ GetTimerFrequency(void)
#define INSTR_TIME_ACCUM_DIFF(x,y,z) \
((x).ticks += (y).ticks - (z).ticks)
-
#define INSTR_TIME_GET_DOUBLE(t) \
((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)