Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b937dd2

Browse files
author
Commitfest Bot
committed
[CF 5620] v10 - Improve monitoring of shared memory allocations
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5620 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/CAH2L28vgzvTUqNwQay=jx4w30sHMx_pC+emnZErv8oX0R+SALQ@mail.gmail.com Author(s): Rahila Syed
2 parents 3c4d755 + fe46124 commit b937dd2

File tree

3 files changed

+198
-51
lines changed

3 files changed

+198
-51
lines changed

src/backend/storage/ipc/shmem.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
#include "storage/shmem.h"
7575
#include "storage/spin.h"
7676
#include "utils/builtins.h"
77+
#include "utils/dynahash.h"
7778

7879
static void *ShmemAllocRaw(Size size, Size *allocated_size);
7980

@@ -351,7 +352,8 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
351352

352353
/* look it up in the shmem index */
353354
location = ShmemInitStruct(name,
354-
hash_get_shared_size(infoP, hash_flags),
355+
hash_get_shared_size(infoP, hash_flags,
356+
init_size),
355357
&found);
356358

357359
/*

src/backend/utils/hash/dynahash.c

Lines changed: 193 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,39 @@ static long hash_accesses,
260260
hash_expansions;
261261
#endif
262262

263+
/* access to parts of the hash table, allocated as a single chunk */
264+
#define HASH_DIRECTORY_PTR(hashp) \
265+
(((char *) (hashp)->hctl) + sizeof(HASHHDR))
266+
267+
#define HASH_SEGMENT_OFFSET(hctl, idx) \
268+
(sizeof(HASHHDR) + \
269+
((hctl)->dsize * sizeof(HASHSEGMENT)) + \
270+
((hctl)->ssize * (idx) * sizeof(HASHBUCKET)))
271+
272+
#define HASH_SEGMENT_PTR(hashp, idx) \
273+
((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, (idx)))
274+
275+
#define HASH_SEGMENT_SIZE(hashp) \
276+
((hashp)->ssize * sizeof(HASHBUCKET))
277+
278+
#define HASH_ELEMENTS_PTR(hashp, nsegs) \
279+
((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, nsegs))
280+
281+
/* Each element has a HASHELEMENT header plus user data. */
282+
#define HASH_ELEMENT_SIZE(hctl) \
283+
(MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN((hctl)->entrysize))
284+
285+
#define HASH_ELEMENT_NEXT(hctl, num, ptr) \
286+
((char *) (ptr) + ((num) * HASH_ELEMENT_SIZE(hctl)))
287+
263288
/*
264289
* Private function prototypes
265290
*/
266291
static void *DynaHashAlloc(Size size);
267292
static HASHSEGMENT seg_alloc(HTAB *hashp);
268-
static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
293+
static HASHELEMENT *element_alloc(HTAB *hashp, int nelem);
294+
static void element_add(HTAB *hashp, HASHELEMENT *firstElement,
295+
int nelem, int freelist_idx);
269296
static bool dir_realloc(HTAB *hashp);
270297
static bool expand_table(HTAB *hashp);
271298
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
@@ -280,6 +307,9 @@ static int next_pow2_int(long num);
280307
static void register_seq_scan(HTAB *hashp);
281308
static void deregister_seq_scan(HTAB *hashp);
282309
static bool has_seq_scans(HTAB *hashp);
310+
static void compute_buckets_and_segs(long nelem, long num_partitions,
311+
long ssize,
312+
int *nbuckets, int *nsegments);
283313

284314

285315
/*
@@ -569,12 +599,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
569599
elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
570600

571601
/*
602+
* For a private hash table, preallocate the requested number of elements
603+
* if it's less than our chosen nelem_alloc. This avoids wasting space if
604+
* the caller correctly estimates a small table size.
605+
*
572606
* For a shared hash table, preallocate the requested number of elements.
573607
* This reduces problems with run-time out-of-shared-memory conditions.
574-
*
575-
* For a non-shared hash table, preallocate the requested number of
576-
* elements if it's less than our chosen nelem_alloc. This avoids wasting
577-
* space if the caller correctly estimates a small table size.
578608
*/
579609
if ((flags & HASH_SHARED_MEM) ||
580610
nelem < hctl->nelem_alloc)
@@ -583,6 +613,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
583613
freelist_partitions,
584614
nelem_alloc,
585615
nelem_alloc_first;
616+
void *ptr = NULL;
586617

587618
/*
588619
* If hash table is partitioned, give each freelist an equal share of
@@ -607,14 +638,42 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
607638
else
608639
nelem_alloc_first = nelem_alloc;
609640

641+
/*
642+
* For a shared hash table, calculate the offset at which to find the
643+
* first partition of elements. We have to skip space for the header,
644+
* segments and buckets.
645+
*/
646+
if (hashp->isshared)
647+
ptr = HASH_ELEMENTS_PTR(hashp, hctl->nsegs);
648+
610649
for (i = 0; i < freelist_partitions; i++)
611650
{
612651
int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
613652

614-
if (!element_alloc(hashp, temp, i))
615-
ereport(ERROR,
616-
(errcode(ERRCODE_OUT_OF_MEMORY),
617-
errmsg("out of memory")));
653+
/*
654+
* Assign the correct location of each parition within a
655+
* pre-allocated buffer.
656+
*
657+
* Actual memory allocation happens in ShmemInitHash for shared
658+
* hash tables.
659+
*
660+
* We just need to split that allocation into per-batch freelists.
661+
*/
662+
if (hashp->isshared)
663+
{
664+
element_add(hashp, (HASHELEMENT *) ptr, temp, i);
665+
ptr = HASH_ELEMENT_NEXT(hctl, temp, ptr);
666+
}
667+
else
668+
{
669+
HASHELEMENT *firstElement = element_alloc(hashp, temp);
670+
671+
if (!firstElement)
672+
ereport(ERROR,
673+
(errcode(ERRCODE_OUT_OF_MEMORY),
674+
errmsg("out of memory")));
675+
element_add(hashp, firstElement, temp, i);
676+
}
618677
}
619678
}
620679

@@ -703,29 +762,16 @@ init_htab(HTAB *hashp, long nelem)
703762
SpinLockInit(&(hctl->freeList[i].mutex));
704763

705764
/*
706-
* Allocate space for the next greater power of two number of buckets,
707-
* assuming a desired maximum load factor of 1.
765+
* We've already calculated these parameters when we calculated how much
766+
* space to allocate in hash_get_shared_size(). Be careful to keep these
767+
* two places in sync, so that we get the same parameters.
708768
*/
709-
nbuckets = next_pow2_int(nelem);
710-
711-
/*
712-
* In a partitioned table, nbuckets must be at least equal to
713-
* num_partitions; were it less, keys with apparently different partition
714-
* numbers would map to the same bucket, breaking partition independence.
715-
* (Normally nbuckets will be much bigger; this is just a safety check.)
716-
*/
717-
while (nbuckets < hctl->num_partitions)
718-
nbuckets <<= 1;
769+
compute_buckets_and_segs(nelem, hctl->num_partitions, hctl->ssize,
770+
&nbuckets, &nsegs);
719771

720772
hctl->max_bucket = hctl->low_mask = nbuckets - 1;
721773
hctl->high_mask = (nbuckets << 1) - 1;
722774

723-
/*
724-
* Figure number of directory segments needed, round up to a power of 2
725-
*/
726-
nsegs = (nbuckets - 1) / hctl->ssize + 1;
727-
nsegs = next_pow2_int(nsegs);
728-
729775
/*
730776
* Make sure directory is big enough. If pre-allocated directory is too
731777
* small, choke (caller screwed up).
@@ -749,12 +795,22 @@ init_htab(HTAB *hashp, long nelem)
749795
}
750796

751797
/* Allocate initial segments */
798+
i = 0;
752799
for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
753800
{
754-
*segp = seg_alloc(hashp);
755-
if (*segp == NULL)
756-
return false;
801+
/* Assign initial segments, which are also pre-allocated */
802+
if (hashp->isshared)
803+
{
804+
*segp = (HASHSEGMENT) HASH_SEGMENT_PTR(hashp, i++);
805+
MemSet(*segp, 0, HASH_SEGMENT_SIZE(hashp));
806+
}
807+
else
808+
{
809+
*segp = seg_alloc(hashp);
810+
i++;
811+
}
757812
}
813+
Assert(i == nsegs);
758814

759815
/* Choose number of entries to allocate at a time */
760816
hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);
@@ -847,16 +903,60 @@ hash_select_dirsize(long num_entries)
847903
}
848904

849905
/*
850-
* Compute the required initial memory allocation for a shared-memory
851-
* hashtable with the given parameters. We need space for the HASHHDR
852-
* and for the (non expansible) directory.
906+
* hash_get_shared_size -- determine memory needed for a new shared dynamic hash table
907+
*
908+
* info: hash table parameters
909+
* flags: bitmask indicating which parameters to take from *info
910+
* nelem: maximum number of elements expected
911+
*
912+
* Compute the required initial memory allocation for a hashtable with the given
913+
* parameters. We need space for the HASHHDR, for the directory, segments and
914+
* preallocated elements.
915+
*
916+
* For shared hash tables the directory size is non-expansive, and we preallocate
917+
* all elements (nelem).
853918
*/
854919
Size
855-
hash_get_shared_size(HASHCTL *info, int flags)
920+
hash_get_shared_size(const HASHCTL *info, int flags, long nelem)
856921
{
922+
int nbuckets;
923+
int nsegs;
924+
int num_partitions;
925+
long ssize;
926+
long dsize;
927+
Size elementSize = HASH_ELEMENT_SIZE(info);
928+
929+
#ifdef USE_ASSERT_CHECKING
930+
/* shared hash tables have non-expansive directory */
931+
Assert(flags & HASH_SHARED_MEM);
857932
Assert(flags & HASH_DIRSIZE);
858933
Assert(info->dsize == info->max_dsize);
859-
return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
934+
#endif
935+
936+
dsize = info->dsize;
937+
938+
if (flags & HASH_SEGMENT)
939+
ssize = info->ssize;
940+
else
941+
ssize = DEF_SEGSIZE;
942+
943+
if (flags & HASH_PARTITION)
944+
{
945+
num_partitions = info->num_partitions;
946+
947+
/* Number of entries should be atleast equal to the freelists */
948+
if (nelem < NUM_FREELISTS)
949+
nelem = NUM_FREELISTS;
950+
}
951+
else
952+
num_partitions = 0;
953+
954+
compute_buckets_and_segs(nelem, num_partitions, ssize,
955+
&nbuckets, &nsegs);
956+
957+
return sizeof(HASHHDR) + dsize * sizeof(HASHSEGMENT)
958+
+ sizeof(HASHBUCKET) * ssize * nsegs
959+
+ nelem * elementSize;
860960
}
861961

862962

@@ -1286,7 +1386,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
12861386
* Failing because the needed element is in a different freelist is
12871387
* not acceptable.
12881388
*/
1289-
if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
1389+
if ((newElement = element_alloc(hashp, hctl->nelem_alloc)) == NULL)
12901390
{
12911391
int borrow_from_idx;
12921392

@@ -1323,6 +1423,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
13231423
/* no elements available to borrow either, so out of memory */
13241424
return NULL;
13251425
}
1426+
element_add(hashp, newElement, hctl->nelem_alloc, freelist_idx);
13261427
}
13271428

13281429
/* remove entry from freelist, bump nentries */
@@ -1701,29 +1802,43 @@ seg_alloc(HTAB *hashp)
17011802
}
17021803

17031804
/*
1704-
* allocate some new elements and link them into the indicated free list
1805+
* allocate some new elements
17051806
*/
1706-
static bool
1707-
element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1807+
static HASHELEMENT *
1808+
element_alloc(HTAB *hashp, int nelem)
17081809
{
17091810
HASHHDR *hctl = hashp->hctl;
17101811
Size elementSize;
1711-
HASHELEMENT *firstElement;
1712-
HASHELEMENT *tmpElement;
1713-
HASHELEMENT *prevElement;
1714-
int i;
1812+
HASHELEMENT *firstElement = NULL;
17151813

17161814
if (hashp->isfixed)
1717-
return false;
1815+
return NULL;
17181816

17191817
/* Each element has a HASHELEMENT header plus user data. */
1720-
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
1721-
1818+
elementSize = HASH_ELEMENT_SIZE(hctl);
17221819
CurrentDynaHashCxt = hashp->hcxt;
17231820
firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
17241821

17251822
if (!firstElement)
1726-
return false;
1823+
return NULL;
1824+
1825+
return firstElement;
1826+
}
1827+
1828+
/*
1829+
* link the elements allocated by element_alloc into the indicated free list
1830+
*/
1831+
static void
1832+
element_add(HTAB *hashp, HASHELEMENT *firstElement, int nelem, int freelist_idx)
1833+
{
1834+
HASHHDR *hctl = hashp->hctl;
1835+
Size elementSize;
1836+
HASHELEMENT *tmpElement;
1837+
HASHELEMENT *prevElement;
1838+
int i;
1839+
1840+
/* Each element has a HASHELEMENT header plus user data. */
1841+
elementSize = HASH_ELEMENT_SIZE(hctl);
17271842

17281843
/* prepare to link all the new entries into the freelist */
17291844
prevElement = NULL;
@@ -1745,8 +1860,6 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx)
17451860

17461861
if (IS_PARTITIONED(hctl))
17471862
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1748-
1749-
return true;
17501863
}
17511864

17521865
/*
@@ -1958,3 +2071,34 @@ AtEOSubXact_HashTables(bool isCommit, int nestDepth)
19582071
}
19592072
}
19602073
}
2074+
2075+
/*
2076+
* Calculate the number of buckets and segments to store the given
2077+
* number of elements in a hash table. Segments contain buckets which
2078+
* in turn contain elements.
2079+
*/
2080+
static void
2081+
compute_buckets_and_segs(long nelem, long num_partitions, long ssize,
2082+
int *nbuckets, int *nsegments)
2083+
{
2084+
/*
2085+
* Allocate space for the next greater power of two number of buckets,
2086+
* assuming a desired maximum load factor of 1.
2087+
*/
2088+
*nbuckets = next_pow2_int(nelem);
2089+
2090+
/*
2091+
* In a partitioned table, nbuckets must be at least equal to
2092+
* num_partitions; were it less, keys with apparently different partition
2093+
* numbers would map to the same bucket, breaking partition independence.
2094+
* (Normally nbuckets will be much bigger; this is just a safety check.)
2095+
*/
2096+
while ((*nbuckets) < num_partitions)
2097+
(*nbuckets) <<= 1;
2098+
2099+
/*
2100+
* Figure number of directory segments needed, round up to a power of 2
2101+
*/
2102+
*nsegments = ((*nbuckets) - 1) / ssize + 1;
2103+
*nsegments = next_pow2_int(*nsegments);
2104+
}

src/include/utils/hsearch.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status);
151151
extern void hash_freeze(HTAB *hashp);
152152
extern Size hash_estimate_size(long num_entries, Size entrysize);
153153
extern long hash_select_dirsize(long num_entries);
154-
extern Size hash_get_shared_size(HASHCTL *info, int flags);
154+
extern Size hash_get_shared_size(const HASHCTL *info, int flags,
155+
long nelem);
155156
extern void AtEOXact_HashTables(bool isCommit);
156157
extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);
157158

0 commit comments

Comments
 (0)