|
| 1 | +/*------------------------------------------------------------------------- |
| 2 | + * |
| 3 | + * pmchild.c |
| 4 | + * Functions for keeping track of postmaster child processes. |
| 5 | + * |
| 6 | + * Postmaster keeps track of all child processes so that when a process exits, |
| 7 | + * it knows what kind of a process it was and can clean up accordingly. Every |
| 8 | + * child process is allocated a PMChild struct from a fixed pool of structs. |
| 9 | + * The size of the pool is determined by various settings that configure how |
| 10 | + * many worker processes and backend connections are allowed, i.e. |
| 11 | + * autovacuum_max_workers, max_worker_processes, max_wal_senders, and |
| 12 | + * max_connections. |
| 13 | + * |
| 14 | + * Dead-end backends are handled slightly differently. There is no limit |
| 15 | + * on the number of dead-end backends, and they do not need unique IDs, so |
| 16 | + * their PMChild structs are allocated dynamically, not from a pool. |
| 17 | + * |
| 18 | + * The structures and functions in this file are private to the postmaster |
| 19 | + * process. But note that there is an array in shared memory, managed by |
| 20 | + * pmsignal.c, that mirrors this. |
| 21 | + * |
| 22 | + * |
| 23 | + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group |
| 24 | + * Portions Copyright (c) 1994, Regents of the University of California |
| 25 | + * |
| 26 | + * IDENTIFICATION |
| 27 | + * src/backend/postmaster/pmchild.c |
| 28 | + * |
| 29 | + *------------------------------------------------------------------------- |
| 30 | + */ |
| 31 | + |
| 32 | +#include "postgres.h" |
| 33 | + |
| 34 | +#include "miscadmin.h" |
| 35 | +#include "postmaster/autovacuum.h" |
| 36 | +#include "postmaster/postmaster.h" |
| 37 | +#include "replication/walsender.h" |
| 38 | +#include "storage/pmsignal.h" |
| 39 | +#include "storage/proc.h" |
| 40 | + |
| 41 | +/* |
| 42 | + * Freelists for different kinds of child processes. We maintain separate |
| 43 | + * pools for each, so that for example launching a lot of regular backends |
| 44 | + * cannot prevent autovacuum or an aux process from launching. |
| 45 | + */ |
| 46 | +typedef struct PMChildPool |
| 47 | +{ |
| 48 | + int size; /* number of PMChild slots reserved for this |
| 49 | + * kind of processes */ |
| 50 | + int first_slotno; /* first slot belonging to this pool */ |
| 51 | + dlist_head freelist; /* currently unused PMChild entries */ |
| 52 | +} PMChildPool; |
| 53 | + |
| 54 | +static PMChildPool pmchild_pools[BACKEND_NUM_TYPES]; |
| 55 | +NON_EXEC_STATIC int num_pmchild_slots = 0; |
| 56 | + |
| 57 | +/* |
| 58 | + * List of active child processes. This includes dead-end children. |
| 59 | + */ |
| 60 | +dlist_head ActiveChildList; |
| 61 | + |
| 62 | +/* |
| 63 | + * MaxLivePostmasterChildren |
| 64 | + * |
| 65 | + * This reports the number of postmaster child processes that can be active. |
| 66 | + * It includes all children except for dead-end children. This allows the |
| 67 | + * array in shared memory (PMChildFlags) to have a fixed maximum size. |
| 68 | + */ |
| 69 | +int |
| 70 | +MaxLivePostmasterChildren(void) |
| 71 | +{ |
| 72 | + if (num_pmchild_slots == 0) |
| 73 | + elog(ERROR, "PM child array not initialized yet"); |
| 74 | + return num_pmchild_slots; |
| 75 | +} |
| 76 | + |
| 77 | +/* |
| 78 | + * Initialize at postmaster startup |
| 79 | + * |
| 80 | + * Note: This is not called on crash restart. We rely on PMChild entries to |
| 81 | + * remain valid through the restart process. This is important because the |
| 82 | + * syslogger survives through the crash restart process, so we must not |
| 83 | + * invalidate its PMChild slot. |
| 84 | + */ |
| 85 | +void |
| 86 | +InitPostmasterChildSlots(void) |
| 87 | +{ |
| 88 | + int slotno; |
| 89 | + PMChild *slots; |
| 90 | + |
| 91 | + /* |
| 92 | + * We allow more connections here than we can have backends because some |
| 93 | + * might still be authenticating; they might fail auth, or some existing |
| 94 | + * backend might exit before the auth cycle is completed. The exact |
| 95 | + * MaxConnections limit is enforced when a new backend tries to join the |
| 96 | + * PGPROC array. |
| 97 | + * |
| 98 | + * WAL senders start out as regular backends, so they share the same pool. |
| 99 | + */ |
| 100 | + pmchild_pools[B_BACKEND].size = 2 * (MaxConnections + max_wal_senders); |
| 101 | + |
| 102 | + pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_max_workers; |
| 103 | + pmchild_pools[B_BG_WORKER].size = max_worker_processes; |
| 104 | + |
| 105 | + /* |
| 106 | + * There can be only one of each of these running at a time. They each |
| 107 | + * get their own pool of just one entry. |
| 108 | + */ |
| 109 | + pmchild_pools[B_AUTOVAC_LAUNCHER].size = 1; |
| 110 | + pmchild_pools[B_SLOTSYNC_WORKER].size = 1; |
| 111 | + pmchild_pools[B_ARCHIVER].size = 1; |
| 112 | + pmchild_pools[B_BG_WRITER].size = 1; |
| 113 | + pmchild_pools[B_CHECKPOINTER].size = 1; |
| 114 | + pmchild_pools[B_STARTUP].size = 1; |
| 115 | + pmchild_pools[B_WAL_RECEIVER].size = 1; |
| 116 | + pmchild_pools[B_WAL_SUMMARIZER].size = 1; |
| 117 | + pmchild_pools[B_WAL_WRITER].size = 1; |
| 118 | + pmchild_pools[B_LOGGER].size = 1; |
| 119 | + |
| 120 | + /* The rest of the pmchild_pools are left at zero size */ |
| 121 | + |
| 122 | + /* Count the total number of slots */ |
| 123 | + num_pmchild_slots = 0; |
| 124 | + for (int i = 0; i < BACKEND_NUM_TYPES; i++) |
| 125 | + num_pmchild_slots += pmchild_pools[i].size; |
| 126 | + |
| 127 | + /* Initialize them */ |
| 128 | + slots = palloc(num_pmchild_slots * sizeof(PMChild)); |
| 129 | + slotno = 0; |
| 130 | + for (int btype = 0; btype < BACKEND_NUM_TYPES; btype++) |
| 131 | + { |
| 132 | + pmchild_pools[btype].first_slotno = slotno + 1; |
| 133 | + dlist_init(&pmchild_pools[btype].freelist); |
| 134 | + |
| 135 | + for (int j = 0; j < pmchild_pools[btype].size; j++) |
| 136 | + { |
| 137 | + slots[slotno].pid = 0; |
| 138 | + slots[slotno].child_slot = slotno + 1; |
| 139 | + slots[slotno].bkend_type = B_INVALID; |
| 140 | + slots[slotno].rw = NULL; |
| 141 | + slots[slotno].bgworker_notify = false; |
| 142 | + dlist_push_tail(&pmchild_pools[btype].freelist, &slots[slotno].elem); |
| 143 | + slotno++; |
| 144 | + } |
| 145 | + } |
| 146 | + Assert(slotno == num_pmchild_slots); |
| 147 | + |
| 148 | + /* Initialize other structures */ |
| 149 | + dlist_init(&ActiveChildList); |
| 150 | +} |
| 151 | + |
| 152 | +/* |
| 153 | + * Allocate a PMChild entry for a postmaster child process of given type. |
| 154 | + * |
| 155 | + * The entry is taken from the right pool for the type. |
| 156 | + * |
| 157 | + * pmchild->child_slot in the returned struct is unique among all active child |
| 158 | + * processes. |
| 159 | + */ |
| 160 | +PMChild * |
| 161 | +AssignPostmasterChildSlot(BackendType btype) |
| 162 | +{ |
| 163 | + dlist_head *freelist; |
| 164 | + PMChild *pmchild; |
| 165 | + |
| 166 | + if (pmchild_pools[btype].size == 0) |
| 167 | + elog(ERROR, "cannot allocate a PMChild slot for backend type %d", btype); |
| 168 | + |
| 169 | + freelist = &pmchild_pools[btype].freelist; |
| 170 | + if (dlist_is_empty(freelist)) |
| 171 | + return NULL; |
| 172 | + |
| 173 | + pmchild = dlist_container(PMChild, elem, dlist_pop_head_node(freelist)); |
| 174 | + pmchild->pid = 0; |
| 175 | + pmchild->bkend_type = btype; |
| 176 | + pmchild->rw = NULL; |
| 177 | + pmchild->bgworker_notify = true; |
| 178 | + |
| 179 | + /* |
| 180 | + * pmchild->child_slot for each entry was initialized when the array of |
| 181 | + * slots was allocated. Sanity check it. |
| 182 | + */ |
| 183 | + if (!(pmchild->child_slot >= pmchild_pools[btype].first_slotno && |
| 184 | + pmchild->child_slot < pmchild_pools[btype].first_slotno + pmchild_pools[btype].size)) |
| 185 | + { |
| 186 | + elog(ERROR, "pmchild freelist for backend type %d is corrupt", |
| 187 | + pmchild->bkend_type); |
| 188 | + } |
| 189 | + |
| 190 | + dlist_push_head(&ActiveChildList, &pmchild->elem); |
| 191 | + |
| 192 | + /* Update the status in the shared memory array */ |
| 193 | + MarkPostmasterChildSlotAssigned(pmchild->child_slot); |
| 194 | + |
| 195 | + elog(DEBUG2, "assigned pm child slot %d for %s", |
| 196 | + pmchild->child_slot, PostmasterChildName(btype)); |
| 197 | + |
| 198 | + return pmchild; |
| 199 | +} |
| 200 | + |
| 201 | +/* |
| 202 | + * Allocate a PMChild struct for a dead-end backend. Dead-end children are |
| 203 | + * not assigned a child_slot number. The struct is palloc'd; returns NULL if |
| 204 | + * out of memory. |
| 205 | + */ |
| 206 | +PMChild * |
| 207 | +AllocDeadEndChild(void) |
| 208 | +{ |
| 209 | + PMChild *pmchild; |
| 210 | + |
| 211 | + elog(DEBUG2, "allocating dead-end child"); |
| 212 | + |
| 213 | + pmchild = (PMChild *) palloc_extended(sizeof(PMChild), MCXT_ALLOC_NO_OOM); |
| 214 | + if (pmchild) |
| 215 | + { |
| 216 | + pmchild->pid = 0; |
| 217 | + pmchild->child_slot = 0; |
| 218 | + pmchild->bkend_type = B_DEAD_END_BACKEND; |
| 219 | + pmchild->rw = NULL; |
| 220 | + pmchild->bgworker_notify = false; |
| 221 | + |
| 222 | + dlist_push_head(&ActiveChildList, &pmchild->elem); |
| 223 | + } |
| 224 | + |
| 225 | + return pmchild; |
| 226 | +} |
| 227 | + |
| 228 | +/* |
| 229 | + * Release a PMChild slot, after the child process has exited. |
| 230 | + * |
| 231 | + * Returns true if the child detached cleanly from shared memory, false |
| 232 | + * otherwise (see MarkPostmasterChildSlotUnassigned). |
| 233 | + */ |
| 234 | +bool |
| 235 | +ReleasePostmasterChildSlot(PMChild *pmchild) |
| 236 | +{ |
| 237 | + dlist_delete(&pmchild->elem); |
| 238 | + if (pmchild->bkend_type == B_DEAD_END_BACKEND) |
| 239 | + { |
| 240 | + elog(DEBUG2, "releasing dead-end backend"); |
| 241 | + pfree(pmchild); |
| 242 | + return true; |
| 243 | + } |
| 244 | + else |
| 245 | + { |
| 246 | + PMChildPool *pool; |
| 247 | + |
| 248 | + elog(DEBUG2, "releasing pm child slot %d", pmchild->child_slot); |
| 249 | + |
| 250 | + /* WAL senders start out as regular backends, and share the pool */ |
| 251 | + if (pmchild->bkend_type == B_WAL_SENDER) |
| 252 | + pool = &pmchild_pools[B_BACKEND]; |
| 253 | + else |
| 254 | + pool = &pmchild_pools[pmchild->bkend_type]; |
| 255 | + |
| 256 | + /* sanity check that we return the entry to the right pool */ |
| 257 | + if (!(pmchild->child_slot >= pool->first_slotno && |
| 258 | + pmchild->child_slot < pool->first_slotno + pool->size)) |
| 259 | + { |
| 260 | + elog(ERROR, "pmchild freelist for backend type %d is corrupt", |
| 261 | + pmchild->bkend_type); |
| 262 | + } |
| 263 | + |
| 264 | + dlist_push_head(&pool->freelist, &pmchild->elem); |
| 265 | + return MarkPostmasterChildSlotUnassigned(pmchild->child_slot); |
| 266 | + } |
| 267 | +} |
| 268 | + |
| 269 | +/* |
| 270 | + * Find the PMChild entry of a running child process by PID. |
| 271 | + */ |
| 272 | +PMChild * |
| 273 | +FindPostmasterChildByPid(int pid) |
| 274 | +{ |
| 275 | + dlist_iter iter; |
| 276 | + |
| 277 | + dlist_foreach(iter, &ActiveChildList) |
| 278 | + { |
| 279 | + PMChild *bp = dlist_container(PMChild, elem, iter.cur); |
| 280 | + |
| 281 | + if (bp->pid == pid) |
| 282 | + return bp; |
| 283 | + } |
| 284 | + return NULL; |
| 285 | +} |
0 commit comments