|
| 1 | +/*------------------------------------------------------------------------- |
| 2 | + * |
| 3 | + * shm_toc.c |
| 4 | + * shared memory segment table of contents |
| 5 | + * |
| 6 | + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group |
| 7 | + * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | + * |
| 9 | + * src/include/storage/shm_toc.c |
| 10 | + * |
| 11 | + *------------------------------------------------------------------------- |
| 12 | + */ |
| 13 | + |
| 14 | +#include "postgres.h" |
| 15 | + |
| 16 | +#include "storage/barrier.h" |
| 17 | +#include "storage/shm_toc.h" |
| 18 | +#include "storage/spin.h" |
| 19 | + |
| 20 | +typedef struct shm_toc_entry |
| 21 | +{ |
| 22 | + uint64 key; /* Arbitrary identifier */ |
| 23 | + uint64 offset; /* Bytes offset */ |
| 24 | +} shm_toc_entry; |
| 25 | + |
| 26 | +struct shm_toc |
| 27 | +{ |
| 28 | + uint64 toc_magic; /* Magic number for this TOC */ |
| 29 | + slock_t toc_mutex; /* Spinlock for mutual exclusion */ |
| 30 | + Size toc_total_bytes; /* Bytes managed by this TOC */ |
| 31 | + Size toc_allocated_bytes; /* Bytes allocated of those managed */ |
| 32 | + Size toc_nentry; /* Number of entries in TOC */ |
| 33 | + shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER]; |
| 34 | +}; |
| 35 | + |
| 36 | +/* |
| 37 | + * Initialize a region of shared memory with a table of contents. |
| 38 | + */ |
| 39 | +shm_toc * |
| 40 | +shm_toc_create(uint64 magic, void *address, Size nbytes) |
| 41 | +{ |
| 42 | + shm_toc *toc = (shm_toc *) address; |
| 43 | + |
| 44 | + Assert(nbytes > offsetof(shm_toc, toc_entry)); |
| 45 | + toc->toc_magic = magic; |
| 46 | + SpinLockInit(&toc->toc_mutex); |
| 47 | + toc->toc_total_bytes = nbytes; |
| 48 | + toc->toc_allocated_bytes = 0; |
| 49 | + toc->toc_nentry = 0; |
| 50 | + |
| 51 | + return toc; |
| 52 | +} |
| 53 | + |
| 54 | +/* |
| 55 | + * Attach to an existing table of contents. If the magic number found at |
| 56 | + * the target address doesn't match our expectations, returns NULL. |
| 57 | + */ |
| 58 | +extern shm_toc * |
| 59 | +shm_toc_attach(uint64 magic, void *address) |
| 60 | +{ |
| 61 | + shm_toc *toc = (shm_toc *) address; |
| 62 | + |
| 63 | + if (toc->toc_magic != magic) |
| 64 | + return NULL; |
| 65 | + |
| 66 | + Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes); |
| 67 | + Assert(toc->toc_total_bytes >= offsetof(shm_toc, toc_entry)); |
| 68 | + |
| 69 | + return toc; |
| 70 | +} |
| 71 | + |
| 72 | +/* |
| 73 | + * Allocate shared memory from a segment managed by a table of contents. |
| 74 | + * |
| 75 | + * This is not a full-blown allocator; there's no way to free memory. It's |
| 76 | + * just a way of dividing a single physical shared memory segment into logical |
| 77 | + * chunks that may be used for different purposes. |
| 78 | + * |
| 79 | + * We allocated backwards from the end of the segment, so that the TOC entries |
| 80 | + * can grow forward from the start of the segment. |
| 81 | + */ |
| 82 | +extern void * |
| 83 | +shm_toc_allocate(shm_toc *toc, Size nbytes) |
| 84 | +{ |
| 85 | + volatile shm_toc *vtoc = toc; |
| 86 | + Size total_bytes; |
| 87 | + Size allocated_bytes; |
| 88 | + Size nentry; |
| 89 | + Size toc_bytes; |
| 90 | + |
| 91 | + /* Make sure request is well-aligned. */ |
| 92 | + nbytes = BUFFERALIGN(nbytes); |
| 93 | + |
| 94 | + SpinLockAcquire(&toc->toc_mutex); |
| 95 | + |
| 96 | + total_bytes = vtoc->toc_total_bytes; |
| 97 | + allocated_bytes = vtoc->toc_allocated_bytes; |
| 98 | + nentry = vtoc->toc_nentry; |
| 99 | + toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) |
| 100 | + + allocated_bytes; |
| 101 | + |
| 102 | + /* Check for memory exhaustion and overflow. */ |
| 103 | + if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes) |
| 104 | + { |
| 105 | + SpinLockRelease(&toc->toc_mutex); |
| 106 | + ereport(ERROR, |
| 107 | + (errcode(ERRCODE_OUT_OF_MEMORY), |
| 108 | + errmsg("out of shared memory"))); |
| 109 | + } |
| 110 | + vtoc->toc_allocated_bytes += nbytes; |
| 111 | + |
| 112 | + SpinLockRelease(&toc->toc_mutex); |
| 113 | + |
| 114 | + return ((char *) toc) + (total_bytes - allocated_bytes - nbytes); |
| 115 | +} |
| 116 | + |
| 117 | +/* |
| 118 | + * Return the number of bytes that can still be allocated. |
| 119 | + */ |
| 120 | +extern Size |
| 121 | +shm_toc_freespace(shm_toc *toc) |
| 122 | +{ |
| 123 | + volatile shm_toc *vtoc = toc; |
| 124 | + Size total_bytes; |
| 125 | + Size allocated_bytes; |
| 126 | + Size nentry; |
| 127 | + Size toc_bytes; |
| 128 | + |
| 129 | + SpinLockAcquire(&toc->toc_mutex); |
| 130 | + total_bytes = vtoc->toc_total_bytes; |
| 131 | + allocated_bytes = vtoc->toc_allocated_bytes; |
| 132 | + nentry = vtoc->toc_nentry; |
| 133 | + SpinLockRelease(&toc->toc_mutex); |
| 134 | + |
| 135 | + toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry); |
| 136 | + Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes); |
| 137 | + return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes)); |
| 138 | +} |
| 139 | + |
| 140 | +/* |
| 141 | + * Insert a TOC entry. |
| 142 | + * |
| 143 | + * The idea here is that process setting up the shared memory segment will |
| 144 | + * register the addresses of data structures within the segment using this |
| 145 | + * function. Each data structure will be identified using a 64-bit key, which |
| 146 | + * is assumed to be a well-known or discoverable integer. Other processes |
| 147 | + * accessing the shared memory segment can pass the same key to |
| 148 | + * shm_toc_lookup() to discover the addresses of those data structures. |
| 149 | + * |
| 150 | + * Since the shared memory segment may be mapped at different addresses within |
| 151 | + * different backends, we store relative rather than absolute pointers. |
| 152 | + * |
| 153 | + * This won't scale well to a large number of keys. Hopefully, that isn't |
| 154 | + * necessary; if it proves to be, we might need to provide a more sophisticated |
| 155 | + * data structure here. But the real idea here is just to give someone mapping |
| 156 | + * a dynamic shared memory the ability to find the bare minimum number of |
| 157 | + * pointers that they need to bootstrap. If you're storing a lot of stuff in |
| 158 | + * here, you're doing it wrong. |
| 159 | + */ |
| 160 | +void |
| 161 | +shm_toc_insert(shm_toc *toc, uint64 key, void *address) |
| 162 | +{ |
| 163 | + volatile shm_toc *vtoc = toc; |
| 164 | + uint64 total_bytes; |
| 165 | + uint64 allocated_bytes; |
| 166 | + uint64 nentry; |
| 167 | + uint64 toc_bytes; |
| 168 | + uint64 offset; |
| 169 | + |
| 170 | + /* Relativize pointer. */ |
| 171 | + Assert(address > (void *) toc); |
| 172 | + offset = ((char *) address) - (char *) toc; |
| 173 | + |
| 174 | + SpinLockAcquire(&toc->toc_mutex); |
| 175 | + |
| 176 | + total_bytes = vtoc->toc_total_bytes; |
| 177 | + allocated_bytes = vtoc->toc_allocated_bytes; |
| 178 | + nentry = vtoc->toc_nentry; |
| 179 | + toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) |
| 180 | + + allocated_bytes; |
| 181 | + |
| 182 | + /* Check for memory exhaustion and overflow. */ |
| 183 | + if (toc_bytes + sizeof(shm_toc_entry) > total_bytes || |
| 184 | + toc_bytes + sizeof(shm_toc_entry) < toc_bytes) |
| 185 | + { |
| 186 | + SpinLockRelease(&toc->toc_mutex); |
| 187 | + ereport(ERROR, |
| 188 | + (errcode(ERRCODE_OUT_OF_MEMORY), |
| 189 | + errmsg("out of shared memory"))); |
| 190 | + } |
| 191 | + |
| 192 | + Assert(offset < total_bytes); |
| 193 | + vtoc->toc_entry[nentry].key = key; |
| 194 | + vtoc->toc_entry[nentry].offset = offset; |
| 195 | + |
| 196 | + /* |
| 197 | + * By placing a write barrier after filling in the entry and before |
| 198 | + * updating the number of entries, we make it safe to read the TOC |
| 199 | + * unlocked. |
| 200 | + */ |
| 201 | + pg_write_barrier(); |
| 202 | + |
| 203 | + vtoc->toc_nentry++; |
| 204 | + |
| 205 | + SpinLockRelease(&toc->toc_mutex); |
| 206 | +} |
| 207 | + |
| 208 | +/* |
| 209 | + * Look up a TOC entry. |
| 210 | + * |
| 211 | + * Unlike the other functions in this file, this operation acquires no lock; |
| 212 | + * it uses only barriers. It probably wouldn't hurt concurrency very much even |
| 213 | + * if it did get a lock, but since it's reasonably likely that a group of |
| 214 | + * worker processes could each read a series of entries from the same TOC |
| 215 | + * right around the same time, there seems to be some value in avoiding it. |
| 216 | + */ |
| 217 | +void * |
| 218 | +shm_toc_lookup(shm_toc *toc, uint64 key) |
| 219 | +{ |
| 220 | + uint64 nentry; |
| 221 | + uint64 i; |
| 222 | + |
| 223 | + /* Read the number of entries before we examine any entry. */ |
| 224 | + nentry = toc->toc_nentry; |
| 225 | + pg_read_barrier(); |
| 226 | + |
| 227 | + /* Now search for a matching entry. */ |
| 228 | + for (i = 0; i < nentry; ++i) |
| 229 | + if (toc->toc_entry[i].key == key) |
| 230 | + return ((char *) toc) + toc->toc_entry[i].offset; |
| 231 | + |
| 232 | + /* No matching entry was found. */ |
| 233 | + return NULL; |
| 234 | +} |
| 235 | + |
| 236 | +/* |
| 237 | + * Estimate how much shared memory will be required to store a TOC and its |
| 238 | + * dependent data structures. |
| 239 | + */ |
| 240 | +Size |
| 241 | +shm_toc_estimate(shm_toc_estimator *e) |
| 242 | +{ |
| 243 | + return add_size(offsetof(shm_toc, toc_entry), |
| 244 | + add_size(mul_size(e->number_of_keys, sizeof(shm_toc_entry)), |
| 245 | + e->space_for_chunks)); |
| 246 | +} |
0 commit comments