diff --git a/doc/src/sgml/ref/pgtesttiming.sgml b/doc/src/sgml/ref/pgtesttiming.sgml index a5eb3aa25e02..7e0266cf58b7 100644 --- a/doc/src/sgml/ref/pgtesttiming.sgml +++ b/doc/src/sgml/ref/pgtesttiming.sgml @@ -93,28 +93,34 @@ PostgreSQL documentation Good results will show most (>90%) individual timing calls take less than - one microsecond. Average per loop overhead will be even lower, below 100 - nanoseconds. This example from an Intel i7-860 system using a TSC clock - source shows excellent performance: + one microsecond (1000 nanoseconds). Average per loop overhead will be even + lower, below 100 nanoseconds. This example from an Intel i9-9880H system + using a TSC clock source shows excellent performance: - Note that different units are used for the per loop time than the - histogram. The loop can have resolution within a few nanoseconds (ns), - while the individual timing calls can only resolve down to one microsecond - (us). + Note that the accuracy of the histogram entries may be lower than the + per loop time. @@ -125,24 +131,25 @@ Histogram of timing durations: When the query executor is running a statement using EXPLAIN ANALYZE, individual operations are timed as well as showing a summary. The overhead of your system can be checked by - counting rows with the psql program: + disabling the per-row timing, using the TIMING OFF + option: -CREATE TABLE t AS SELECT * FROM generate_series(1,100000); -\timing -SELECT COUNT(*) FROM t; -EXPLAIN ANALYZE SELECT COUNT(*) FROM t; +CREATE TABLE t AS SELECT * FROM generate_series(1, 100000); +EXPLAIN (ANALYZE, TIMING OFF) SELECT COUNT(*) FROM t; +EXPLAIN (ANALYZE, TIMING ON) SELECT COUNT(*) FROM t; - The i7-860 system measured runs the count query in 9.8 ms while - the EXPLAIN ANALYZE version takes 16.6 ms, each - processing just over 100,000 rows. That 6.8 ms difference means the timing - overhead per row is 68 ns, about twice what pg_test_timing estimated it - would be. Even that relatively small amount of overhead is making the fully - timed count statement take almost 70% longer. On more substantial queries, - the timing overhead would be less problematic. + The i9-9880H system measured shows an execution time of 4.116 ms for the + TIMING OFF query, and 6.965 ms for the + TIMING ON, each processing 100,000 rows. + + That 2.849 ms difference means the timing overhead per row is 28 ns. As + TIMING ON measures timestamps twice per row returned by + an executor node, the overhead is very close to what pg_test_timing + estimated it would be. @@ -157,28 +164,31 @@ EXPLAIN ANALYZE SELECT COUNT(*) FROM t; /sys/devices/system/clocksource/clocksource0/current_clocksource # pg_test_timing -Per loop time including overhead: 722.92 ns +Testing timing overhead for 3 seconds. +Per loop time including overhead: 708.58 ns Histogram of timing durations: - < us % of total count - 1 27.84870 1155682 - 2 72.05956 2990371 - 4 0.07810 3241 - 8 0.01357 563 - 16 0.00007 3 + < ns % of total count + 1024 99.79796 4225270 + 2048 0.15560 6588 + 4096 0.00035 15 + 8192 0.01738 736 + 16384 0.01679 711 + 32768 0.01190 504 ]]> In this configuration, the sample EXPLAIN ANALYZE above - takes 115.9 ms. That's 1061 ns of timing overhead, again a small multiple - of what's measured directly by this utility. That much timing overhead - means the actual query itself is only taking a tiny fraction of the - accounted for time, most of it is being consumed in overhead instead. In - this configuration, any EXPLAIN ANALYZE totals involving - many timed operations would be inflated significantly by timing overhead. + shows an execution time of 148.7 ms. That's 1392 ns of per-row timing + overhead. Taking the two timestamps per row into account, that's again + close to what pg_test_timing estimated. That much timing overhead means + the actual query itself is only taking a tiny fraction of the accounted for + time, most of it is being consumed in overhead instead. In this + configuration, any EXPLAIN ANALYZE totals involving many + timed operations would be inflated significantly by timing overhead. @@ -196,33 +206,6 @@ kern.timecounter.hardware: ACPI-fast -> TSC - - Other systems may only allow setting the time source on boot. On older - Linux systems the "clock" kernel setting is the only way to make this sort - of change. And even on some more recent ones, the only option you'll see - for a clock source is "jiffies". Jiffies are the older Linux software clock - implementation, which can have good resolution when it's backed by fast - enough timing hardware, as in this example: - - - diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 3b91d02605a0..a33308119519 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -3363,8 +3363,7 @@ count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected) INSTR_TIME_SET_CURRENT(currenttime); elapsed = currenttime; INSTR_TIME_SUBTRACT(elapsed, starttime); - if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000) - >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL) + if (INSTR_TIME_GET_MILLISEC(elapsed) >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL) { if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock)) { diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 56e635f47000..01f67c5d9725 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -67,9 +67,13 @@ InstrInit(Instrumentation *instr, int instrument_options) void InstrStartNode(Instrumentation *instr) { - if (instr->need_timer && - !INSTR_TIME_SET_CURRENT_LAZY(instr->starttime)) - elog(ERROR, "InstrStartNode called twice in a row"); + if (instr->need_timer) + { + if (!INSTR_TIME_IS_ZERO(instr->starttime)) + elog(ERROR, "InstrStartNode called twice in a row"); + else + INSTR_TIME_SET_CURRENT_FAST(instr->starttime); + } /* save buffer usage totals at node entry, if needed */ if (instr->need_bufusage) @@ -95,7 +99,7 @@ InstrStopNode(Instrumentation *instr, double nTuples) if (INSTR_TIME_IS_ZERO(instr->starttime)) elog(ERROR, "InstrStopNode called without start"); - INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SET_CURRENT_FAST(endtime); INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->starttime); INSTR_TIME_SET_ZERO(instr->starttime); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 318600d6d02e..2e429b30a277 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -786,6 +786,9 @@ InitPostgres(const char *in_dbname, Oid dboid, /* Initialize portal manager */ EnablePortalManager(); + /* initialize high-precision interval timing */ + INSTR_TIME_INITIALIZE(); + /* Initialize status reporting */ pgstat_beinit(); diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index ce7aad4b25a4..62a15465f7c0 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -19,8 +19,8 @@ static void handle_args(int argc, char *argv[]); static uint64 test_timing(unsigned int duration); static void output(uint64 loop_count); -/* record duration in powers of 2 microseconds */ -static long long int histogram[32]; +/* record duration in powers of 2 nanoseconds */ +static uint64 histogram[64]; int main(int argc, char *argv[]) @@ -121,35 +121,49 @@ handle_args(int argc, char *argv[]) static uint64 test_timing(unsigned int duration) { - uint64 total_time; - int64 time_elapsed = 0; uint64 loop_count = 0; - uint64 prev, - cur; + instr_time until_time, + total_time; instr_time start_time, - end_time, - temp; + end_time; + instr_time cur; - total_time = duration > 0 ? duration * INT64CONST(1000000) : 0; + INSTR_TIME_INITIALIZE(); + INSTR_TIME_SET_CURRENT_FAST(start_time); - INSTR_TIME_SET_CURRENT(start_time); - cur = INSTR_TIME_GET_MICROSEC(start_time); + /* + * To reduce loop overhead, check loop condition in instr_time domain. + */ + INSTR_TIME_SET_SECONDS(total_time, duration); + until_time = start_time; + INSTR_TIME_ADD(until_time, total_time); - while (time_elapsed < total_time) + cur = start_time; + + while (INSTR_TIME_IS_LT(cur, until_time)) { - int32 diff, - bits = 0; + instr_time temp; + instr_time prev; + int64 diff; + int32 bits = 0; prev = cur; - INSTR_TIME_SET_CURRENT(temp); - cur = INSTR_TIME_GET_MICROSEC(temp); - diff = cur - prev; + INSTR_TIME_SET_CURRENT_FAST(cur); + temp = cur; + INSTR_TIME_SUBTRACT(temp, prev); + diff = INSTR_TIME_GET_NANOSEC(temp); /* Did time go backwards? */ - if (diff < 0) + if (unlikely(diff <= 0)) { + /* can't do anything with that measurement */ + if (diff == 0) + { + loop_count++; + continue; + } fprintf(stderr, _("Detected clock going backwards in time.\n")); - fprintf(stderr, _("Time warp: %d ms\n"), diff); + fprintf(stderr, _("Time warp: %lld ns\n"), (long long) diff); exit(1); } @@ -164,16 +178,14 @@ test_timing(unsigned int duration) histogram[bits]++; loop_count++; - INSTR_TIME_SUBTRACT(temp, start_time); - time_elapsed = INSTR_TIME_GET_MICROSEC(temp); } - INSTR_TIME_SET_CURRENT(end_time); + INSTR_TIME_SET_CURRENT_FAST(end_time); INSTR_TIME_SUBTRACT(end_time, start_time); printf(_("Per loop time including overhead: %0.2f ns\n"), - INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count); + (INSTR_TIME_GET_DOUBLE(end_time) * NS_PER_S) / loop_count); return loop_count; } @@ -181,9 +193,10 @@ test_timing(unsigned int duration) static void output(uint64 loop_count) { - int64 max_bit = 31, + int64 low_bit = 0, + max_bit = 63, i; - char *header1 = _("< us"); + char *header1 = _("< ns"); char *header2 = /* xgettext:no-c-format */ _("% of total"); char *header3 = _("count"); int len1 = strlen(header1); @@ -194,15 +207,19 @@ output(uint64 loop_count) while (max_bit > 0 && histogram[max_bit] == 0) max_bit--; + /* find lowest bit value */ + while (low_bit < max_bit && histogram[low_bit] == 0) + low_bit++; + printf(_("Histogram of timing durations:\n")); printf("%*s %*s %*s\n", - Max(6, len1), header1, + Max(9, len1), header1, Max(10, len2), header2, Max(10, len3), header3); - for (i = 0; i <= max_bit; i++) - printf("%*ld %*.5f %*lld\n", - Max(6, len1), 1l << i, + for (i = low_bit; i <= max_bit; i++) + printf("%*ld %*.5f %*llu\n", + Max(9, len1), 1l << i, Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count, - Max(10, len3), histogram[i]); + Max(10, len3), (long long unsigned) histogram[i]); } diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index bf099aab2787..716f90578f0e 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -7265,6 +7265,9 @@ main(int argc, char **argv) initRandomState(&state[i].cs_func_rs); } + /* initialize high-precision interval timing */ + INSTR_TIME_INITIALIZE(); + /* opening connection... */ con = doConnect(); if (con == NULL) diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index 5018eedf1e57..789a2aa67635 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -24,6 +24,7 @@ #include "help.h" #include "input.h" #include "mainloop.h" +#include "portability/instr_time.h" #include "settings.h" /* @@ -327,6 +328,9 @@ main(int argc, char *argv[]) PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL); + /* initialize high-precision interval timing */ + INSTR_TIME_INITIALIZE(); + SyncVariables(); if (options.list_dbs) diff --git a/src/common/Makefile b/src/common/Makefile index 1e2b91c83c4c..194e45e7ae8d 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -59,6 +59,7 @@ OBJS_COMMON = \ file_perm.o \ file_utils.o \ hashfn.o \ + instr_time.o \ ip.o \ jsonapi.o \ keywords.o \ diff --git a/src/common/instr_time.c b/src/common/instr_time.c new file mode 100644 index 000000000000..9b18e6e3588d --- /dev/null +++ b/src/common/instr_time.c @@ -0,0 +1,170 @@ +/*------------------------------------------------------------------------- + * + * instr_time.c + * Non-inline parts of the portable high-precision interval timing + * implementation + * + * Portions Copyright (c) 2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/port/instr_time.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "portability/instr_time.h" + +#ifndef WIN32 +/* + * Stores what the number of cycles needs to be multiplied with to end up + * with nanoseconds using integer math. See comment in pg_initialize_rdtsc() + * for more details. + * + * By default assume we are using clock_gettime() as a fallback which uses + * nanoseconds as ticks. Hence, we set the multiplier to the precision scalar + * so that the division in INSTR_TIME_GET_NANOSEC() won't change the nanoseconds. + * + * When using the RDTSC instruction directly this is filled in during initialization + * based on the relevant CPUID fields. + */ +int64 ticks_per_ns_scaled = TICKS_TO_NS_PRECISION; +int64 ticks_per_sec = NS_PER_S; +int64 max_ticks_no_overflow = PG_INT64_MAX / TICKS_TO_NS_PRECISION; + +#if defined(__x86_64__) && defined(__linux__) +/* + * Indicates if RDTSC can be used (Linux/x86 only, when OS uses TSC clocksource) + */ +bool has_rdtsc = false; + +/* + * Indicates if RDTSCP can be used. True if RDTSC can be used and RDTSCP is available. + */ +bool has_rdtscp = false; + +#define CPUID_HYPERVISOR_VMWARE(words) (words[1] == 0x61774d56 && words[2] == 0x4d566572 && words[3] == 0x65726177) /* VMwareVMware */ +#define CPUID_HYPERVISOR_KVM(words) (words[1] == 0x4b4d564b && words[2] == 0x564b4d56 && words[3] == 0x0000004d) /* KVMKVMKVM */ + +static bool +get_tsc_frequency_khz(uint32 *tsc_freq) +{ + uint32 r[4]; + + if (__get_cpuid(0x15, &r[0] /* denominator */ , &r[1] /* numerator */ , &r[2] /* hz */ , &r[3]) && r[2] > 0) + { + if (r[0] == 0 || r[1] == 0) + return false; + + *tsc_freq = r[2] / 1000 * r[1] / r[0]; + return true; + } + + /* Some CPUs only report frequency in 16H */ + if (__get_cpuid(0x16, &r[0] /* base_mhz */ , &r[1], &r[2], &r[3])) + { + *tsc_freq = r[0] * 1000; + return true; + } + + /* + * Check if we have a KVM or VMware Hypervisor passing down TSC frequency + * to us in a guest VM + * + * Note that accessing the 0x40000000 leaf for Hypervisor info requires + * use of __cpuidex to set ECX to 0. + * + * TODO: We need to check whether our compiler is new enough + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95973) + */ + __cpuidex((int32 *) r, 0x40000000, 0); + if (r[0] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(r) || CPUID_HYPERVISOR_KVM(r))) + { + __cpuidex((int32 *) r, 0x40000010, 0); + if (r[0] > 0) + { + *tsc_freq = r[0]; + return true; + } + } + + return false; +} + +static bool +is_rdtscp_available() +{ + uint32 r[4]; + + return __get_cpuid(0x80000001, &r[0], &r[1], &r[2], &r[3]) > 0 && (r[3] & (1 << 27)) != 0; +} + +/* + * Decide whether we use the RDTSC instruction at runtime, for Linux/x86, + * instead of incurring the overhead of a full clock_gettime() call. + * + * This can't be reliably determined at compile time, since the + * availability of an "invariant" TSC (that is not affected by CPU + * frequency changes) is dependent on the CPU architecture. Additionally, + * there are cases where TSC availability is impacted by virtualization, + * where a simple cpuid feature check would not be enough. + * + * Since Linux already does a significant amount of work to determine + * whether TSC is a viable clock source, decide based on that. + */ +void +pg_initialize_rdtsc(void) +{ + FILE *fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); + + if (fp) + { + char buf[128]; + + if (fgets(buf, sizeof(buf), fp) != NULL && strcmp(buf, "tsc\n") == 0) + { + /* + * Compute baseline CPU peformance, determines speed at which + * RDTSC advances. + */ + uint32 tsc_freq; + + if (get_tsc_frequency_khz(&tsc_freq)) + { + /* + * Ticks to nanoseconds conversion requires floating point + * math because because: + * + * sec = ticks / frequency_hz ns = ticks / frequency_hz * + * 1,000,000,000 ns = ticks * (1,000,000,000 / frequency_hz) + * ns = ticks * (1,000,000 / frequency_khz) <-- now in + * kilohertz + * + * Here, 'ns' is usually a floating number. For example for a + * 2.5 GHz CPU the scaling factor becomes 1,000,000 / + * 2,500,000 = 1.2. + * + * To be able to use integer math we work around the lack of + * precision. We first scale the integer up and after the + * multiplication by the number of ticks in + * INSTR_TIME_GET_NANOSEC() we divide again by the same value. + * We picked the scaler such that it provides enough precision + * and is a power-of-two which allows for shifting instead of + * doing an integer division. + */ + ticks_per_ns_scaled = INT64CONST(1000000) * TICKS_TO_NS_PRECISION / tsc_freq; + ticks_per_sec = tsc_freq * 1000; /* KHz->Hz */ + max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled; + + has_rdtsc = true; + has_rdtscp = is_rdtscp_available(); + } + } + + fclose(fp); + } +} +#endif /* defined(__x86_64__) && defined(__linux__) */ + +#endif /* WIN32 */ diff --git a/src/common/meson.build b/src/common/meson.build index 1540ba67cca4..62b90b3e609c 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -13,6 +13,7 @@ common_sources = files( 'file_perm.c', 'file_utils.c', 'hashfn.c', + 'instr_time.c', 'ip.c', 'jsonapi.c', 'keywords.c', diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index f71a851b18d8..fd8ca1220012 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -4,9 +4,11 @@ * portable high-precision interval timing * * This file provides an abstraction layer to hide portability issues in - * interval timing. On Unix we use clock_gettime(), and on Windows we use - * QueryPerformanceCounter(). These macros also give some breathing room to - * use other high-precision-timing APIs. + * interval timing. On Linux/x86 we use the rdtsc instruction when a TSC + * clocksource is also used on the host OS. Otherwise, and on other + * Unix-like systems we use clock_gettime() and on Windows we use + * QueryPerformanceCounter(). These macros also give some breathing + * room to use other high-precision-timing APIs. * * The basic data type is instr_time, which all callers should treat as an * opaque typedef. instr_time can store either an absolute time (of @@ -15,12 +17,17 @@ * * INSTR_TIME_IS_ZERO(t) is t equal to zero? * + * INSTR_TIME_IS_LT(x, y) x < y + * * INSTR_TIME_SET_ZERO(t) set t to zero (memset is acceptable too) * - * INSTR_TIME_SET_CURRENT(t) set t to current time + * INSTR_TIME_SET_CURRENT_FAST(t) set t to current time without waiting + * for instructions in out-of-order window + * + * INSTR_TIME_SET_CURRENT(t) set t to current time while waiting for + * instructions in OOO to retire * - * INSTR_TIME_SET_CURRENT_LAZY(t) set t to current time if t is zero, - * evaluates to whether t changed + * INSTR_TIME_SET_SECONDS(t, s) set t to s seconds * * INSTR_TIME_ADD(x, y) x += y * @@ -80,7 +87,15 @@ typedef struct instr_time #ifndef WIN32 +/* + * Make sure this is a power-of-two, so that the compiler can turn the + * multiplications and divisions into shifts. + */ +#define TICKS_TO_NS_PRECISION (1<<14) +extern int64 ticks_per_ns_scaled; +extern int64 ticks_per_sec; +extern int64 max_ticks_no_overflow; /* Use clock_gettime() */ @@ -106,32 +121,124 @@ typedef struct instr_time #define PG_INSTR_CLOCK CLOCK_REALTIME #endif -/* helper for INSTR_TIME_SET_CURRENT */ +#if defined(__x86_64__) && defined(__linux__) +#include +#include + +extern bool has_rdtsc; +extern bool has_rdtscp; + +extern void pg_initialize_rdtsc(void); +#endif + static inline instr_time -pg_clock_gettime_ns(void) +pg_clock_gettime() { instr_time now; struct timespec tmp; clock_gettime(PG_INSTR_CLOCK, &tmp); now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec; - return now; } +static inline instr_time +pg_get_ticks_fast(void) +{ +#if defined(__x86_64__) && defined(__linux__) + if (has_rdtsc) + { + instr_time now; + + now.ticks = __rdtsc(); + return now; + } +#endif + + return pg_clock_gettime(); +} + +static inline instr_time +pg_get_ticks(void) +{ +#if defined(__x86_64__) && defined(__linux__) + if (has_rdtscp) + { + instr_time now; + uint32 unused; + + now.ticks = __rdtscp(&unused); + return now; + } +#endif + + return pg_clock_gettime(); +} + +static inline int64_t +pg_ticks_to_ns(instr_time t) +{ + /* + * Would multiplication overflow? If so perform computation in two parts. + * Check overflow without actually overflowing via: a * b > max <=> a > + * max / b + */ + int64 ns = 0; + + if (unlikely(t.ticks > max_ticks_no_overflow)) + { + /* + * Compute how often the maximum number of ticks fits completely into + * the number of elapsed ticks and convert that number into + * nanoseconds. Then multiply by the count to arrive at the final + * value. In a 2nd step we adjust the number of elapsed ticks and + * convert the remaining ticks. + */ + int64 count = t.ticks / max_ticks_no_overflow; + int64 max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; + + ns = max_ns * count; + + /* + * Subtract the ticks that we now already accounted for, so that they + * don't get counted twice. + */ + t.ticks -= count * max_ticks_no_overflow; + Assert(t.ticks >= 0); + } + + ns += t.ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION; + return ns; +} + +static inline void +pg_initialize_get_ticks() +{ +#if defined(__x86_64__) && defined(__linux__) + pg_initialize_rdtsc(); +#endif +} + +#define INSTR_TIME_INITIALIZE() \ + pg_initialize_get_ticks() + +#define INSTR_TIME_SET_CURRENT_FAST(t) \ + ((t) = pg_get_ticks_fast()) + #define INSTR_TIME_SET_CURRENT(t) \ - ((t) = pg_clock_gettime_ns()) + ((t) = pg_get_ticks()) -#define INSTR_TIME_GET_NANOSEC(t) \ - ((int64) (t).ticks) +#define INSTR_TIME_SET_SECONDS(t, s) \ + ((t).ticks = (s) * ticks_per_sec) +#define INSTR_TIME_GET_NANOSEC(t) \ + pg_ticks_to_ns(t) #else /* WIN32 */ /* Use QueryPerformanceCounter() */ -/* helper for INSTR_TIME_SET_CURRENT */ static inline instr_time pg_query_performance_counter(void) { @@ -153,9 +260,17 @@ GetTimerFrequency(void) return (double) f.QuadPart; } +#define INSTR_TIME_INITIALIZE() + +#define INSTR_TIME_SET_CURRENT_FAST(t) \ + ((t) = pg_query_performance_counter()) + #define INSTR_TIME_SET_CURRENT(t) \ ((t) = pg_query_performance_counter()) +#define INSTR_TIME_SET_SECONDS(t, s) \ + ((t).ticks = s * GetTimerFrequency()) + #define INSTR_TIME_GET_NANOSEC(t) \ ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency()))) @@ -168,13 +283,10 @@ GetTimerFrequency(void) #define INSTR_TIME_IS_ZERO(t) ((t).ticks == 0) +#define INSTR_TIME_IS_LT(x, y) ((x).ticks < (y).ticks) #define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0) -#define INSTR_TIME_SET_CURRENT_LAZY(t) \ - (INSTR_TIME_IS_ZERO(t) ? INSTR_TIME_SET_CURRENT(t), true : false) - - #define INSTR_TIME_ADD(x,y) \ ((x).ticks += (y).ticks) @@ -184,7 +296,6 @@ GetTimerFrequency(void) #define INSTR_TIME_ACCUM_DIFF(x,y,z) \ ((x).ticks += (y).ticks - (z).ticks) - #define INSTR_TIME_GET_DOUBLE(t) \ ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)