diff --git a/README.md b/README.md index d8b03dc..ddced01 100644 --- a/README.md +++ b/README.md @@ -133,13 +133,14 @@ in-memory hash table. The work of wait event statistics collector worker is controlled by following GUCs. -| Parameter name | Data type | Description | Default value | -| ----------------------------------- | --------- | ------------------------------------------- | ------------: | -| pg_wait_sampling.history_size | int4 | Size of history in-memory ring buffer | 5000 | -| pg_wait_sampling.history_period | int4 | Period for history sampling in milliseconds | 10 | -| pg_wait_sampling.profile_period | int4 | Period for profile sampling in milliseconds | 10 | -| pg_wait_sampling.profile_pid | bool | Whether profile should be per pid | true | -| pg_wait_sampling.profile_queries | bool | Whether profile should be per query | true | +| Parameter name | Data type | Description | Default value | +|----------------------------------| --------- |---------------------------------------------|--------------:| +| pg_wait_sampling.history_size | int4 | Size of history in-memory ring buffer | 5000 | +| pg_wait_sampling.history_period | int4 | Period for history sampling in milliseconds | 10 | +| pg_wait_sampling.profile_period | int4 | Period for profile sampling in milliseconds | 10 | +| pg_wait_sampling.profile_pid | bool | Whether profile should be per pid | true | +| pg_wait_sampling.profile_queries | bool | Whether profile should be per query | true | +| pg_wait_sampling.sample_cpu | bool | Whether on CPU backends should be sampled | true | If `pg_wait_sampling.profile_pid` is set to false, sampling profile wouldn't be collected in per-process manner. In this case the value of pid could would @@ -148,6 +149,10 @@ be always zero and corresponding row contain samples among all the processes. While `pg_wait_sampling.profile_queries` is set to false `queryid` field in views will be zero. +If `pg_wait_sampling.sample_cpu` is set to true then processes that are not +waiting on anything are also sampled. The wait event columns for such processes +will be NULL. + These GUCs are allowed to be changed by superuser. Also, they are placed into shared memory. Thus, they could be changed from any backend and affects worker runtime. diff --git a/collector.c b/collector.c index dcb9695..215bebc 100644 --- a/collector.c +++ b/collector.c @@ -163,10 +163,7 @@ probe_waits(History *observations, HTAB *profile_hash, *observation; PGPROC *proc = &ProcGlobal->allProcs[i]; - if (proc->pid == 0) - continue; - - if (proc->wait_event_info == 0) + if (!pgws_should_sample_proc(proc)) continue; /* Collect next wait event sample */ diff --git a/pg_wait_sampling.c b/pg_wait_sampling.c index 268b40e..f226728 100644 --- a/pg_wait_sampling.c +++ b/pg_wait_sampling.c @@ -198,7 +198,8 @@ setup_gucs() history_period_found = false, profile_period_found = false, profile_pid_found = false, - profile_queries_found = false; + profile_queries_found = false, + sample_cpu_found = false; get_guc_variables_compat(&guc_vars, &numOpts); @@ -240,6 +241,12 @@ setup_gucs() var->_bool.variable = &pgws_collector_hdr->profileQueries; pgws_collector_hdr->profileQueries = true; } + else if (!strcmp(name, "pg_wait_sampling.sample_cpu")) + { + sample_cpu_found = true; + var->_bool.variable = &pgws_collector_hdr->sampleCpu; + pgws_collector_hdr->sampleCpu = true; + } } if (!history_size_found) @@ -272,11 +279,18 @@ setup_gucs() &pgws_collector_hdr->profileQueries, true, PGC_SUSET, 0, shmem_bool_guc_check_hook, NULL, NULL); + if (!sample_cpu_found) + DefineCustomBoolVariable("pg_wait_sampling.sample_cpu", + "Sets whether not waiting backends should be sampled.", NULL, + &pgws_collector_hdr->sampleCpu, true, + PGC_SUSET, 0, shmem_bool_guc_check_hook, NULL, NULL); + if (history_size_found || history_period_found || profile_period_found || profile_pid_found - || profile_queries_found) + || profile_queries_found + || sample_cpu_found) { ProcessConfigFile(PGC_SIGHUP); } @@ -436,6 +450,28 @@ search_proc(int pid) return NULL; } +/* + * Decide whether this PGPROC entry should be included in profiles and output + * views. + */ +bool +pgws_should_sample_proc(PGPROC *proc) +{ + if (proc->wait_event_info == 0 && !pgws_collector_hdr->sampleCpu) + return false; + + /* + * On PostgreSQL versions < 17 the PGPROC->pid field is not reset on + * process exit. This would lead to such processes getting counted for + * null wait events. So instead we make use of DisownLatch() resetting + * owner_pid during ProcKill(). + */ + if (proc->pid == 0 || proc->procLatch.owner_pid == 0 || proc->pid == MyProcPid) + return false; + + return true; +} + typedef struct { HistoryItem *items; @@ -501,13 +537,13 @@ pg_wait_sampling_get_current(PG_FUNCTION_ARGS) { PGPROC *proc = &ProcGlobal->allProcs[i]; - if (proc != NULL && proc->pid != 0 && proc->wait_event_info) - { - params->items[j].pid = proc->pid; - params->items[j].wait_event_info = proc->wait_event_info; - params->items[j].queryId = pgws_proc_queryids[i]; - j++; - } + if (!pgws_should_sample_proc(proc)) + continue; + + params->items[j].pid = proc->pid; + params->items[j].wait_event_info = proc->wait_event_info; + params->items[j].queryId = pgws_proc_queryids[i]; + j++; } funcctx->max_calls = j; } diff --git a/pg_wait_sampling.h b/pg_wait_sampling.h index 29425fc..8ff31f8 100644 --- a/pg_wait_sampling.h +++ b/pg_wait_sampling.h @@ -68,6 +68,7 @@ typedef struct int profilePeriod; bool profilePid; bool profileQueries; + bool sampleCpu; } CollectorShmqHeader; /* pg_wait_sampling.c */ @@ -75,6 +76,7 @@ extern CollectorShmqHeader *pgws_collector_hdr; extern shm_mq *pgws_collector_mq; extern uint64 *pgws_proc_queryids; extern void pgws_init_lock_tag(LOCKTAG *tag, uint32 lock); +extern bool pgws_should_sample_proc(PGPROC *proc); /* collector.c */ extern void pgws_register_wait_collector(void);