diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index c9ae3b45b76b..46490291ff3f 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -74,6 +74,7 @@ #include "storage/shmem.h" #include "storage/spin.h" #include "utils/builtins.h" +#include "utils/dynahash.h" static void *ShmemAllocRaw(Size size, Size *allocated_size); @@ -351,7 +352,8 @@ ShmemInitHash(const char *name, /* table string name for shmem index */ /* look it up in the shmem index */ location = ShmemInitStruct(name, - hash_get_shared_size(infoP, hash_flags), + hash_get_shared_size(infoP, hash_flags, + init_size), &found); /* diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 1ad155d446e5..818dbdc61255 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -260,12 +260,39 @@ static long hash_accesses, hash_expansions; #endif +/* access to parts of the hash table, allocated as a single chunk */ +#define HASH_DIRECTORY_PTR(hashp) \ + (((char *) (hashp)->hctl) + sizeof(HASHHDR)) + +#define HASH_SEGMENT_OFFSET(hctl, idx) \ + (sizeof(HASHHDR) + \ + ((hctl)->dsize * sizeof(HASHSEGMENT)) + \ + ((hctl)->ssize * (idx) * sizeof(HASHBUCKET))) + +#define HASH_SEGMENT_PTR(hashp, idx) \ + ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, (idx))) + +#define HASH_SEGMENT_SIZE(hashp) \ + ((hashp)->ssize * sizeof(HASHBUCKET)) + +#define HASH_ELEMENTS_PTR(hashp, nsegs) \ + ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, nsegs)) + +/* Each element has a HASHELEMENT header plus user data. */ +#define HASH_ELEMENT_SIZE(hctl) \ + (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN((hctl)->entrysize)) + +#define HASH_ELEMENT_NEXT(hctl, num, ptr) \ + ((char *) (ptr) + ((num) * HASH_ELEMENT_SIZE(hctl))) + /* * Private function prototypes */ static void *DynaHashAlloc(Size size); static HASHSEGMENT seg_alloc(HTAB *hashp); -static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx); +static HASHELEMENT *element_alloc(HTAB *hashp, int nelem); +static void element_add(HTAB *hashp, HASHELEMENT *firstElement, + int nelem, int freelist_idx); static bool dir_realloc(HTAB *hashp); static bool expand_table(HTAB *hashp); static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx); @@ -280,6 +307,9 @@ static int next_pow2_int(long num); static void register_seq_scan(HTAB *hashp); static void deregister_seq_scan(HTAB *hashp); static bool has_seq_scans(HTAB *hashp); +static void compute_buckets_and_segs(long nelem, long num_partitions, + long ssize, + int *nbuckets, int *nsegments); /* @@ -569,12 +599,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname); /* + * For a private hash table, preallocate the requested number of elements + * if it's less than our chosen nelem_alloc. This avoids wasting space if + * the caller correctly estimates a small table size. + * * For a shared hash table, preallocate the requested number of elements. * This reduces problems with run-time out-of-shared-memory conditions. - * - * For a non-shared hash table, preallocate the requested number of - * elements if it's less than our chosen nelem_alloc. This avoids wasting - * space if the caller correctly estimates a small table size. */ if ((flags & HASH_SHARED_MEM) || nelem < hctl->nelem_alloc) @@ -583,6 +613,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) freelist_partitions, nelem_alloc, nelem_alloc_first; + void *ptr = NULL; /* * If hash table is partitioned, give each freelist an equal share of @@ -607,14 +638,42 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) else nelem_alloc_first = nelem_alloc; + /* + * For a shared hash table, calculate the offset at which to find the + * first partition of elements. We have to skip space for the header, + * segments and buckets. + */ + if (hashp->isshared) + ptr = HASH_ELEMENTS_PTR(hashp, hctl->nsegs); + for (i = 0; i < freelist_partitions; i++) { int temp = (i == 0) ? nelem_alloc_first : nelem_alloc; - if (!element_alloc(hashp, temp, i)) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); + /* + * Assign the correct location of each parition within a + * pre-allocated buffer. + * + * Actual memory allocation happens in ShmemInitHash for shared + * hash tables. + * + * We just need to split that allocation into per-batch freelists. + */ + if (hashp->isshared) + { + element_add(hashp, (HASHELEMENT *) ptr, temp, i); + ptr = HASH_ELEMENT_NEXT(hctl, temp, ptr); + } + else + { + HASHELEMENT *firstElement = element_alloc(hashp, temp); + + if (!firstElement) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + element_add(hashp, firstElement, temp, i); + } } } @@ -703,29 +762,16 @@ init_htab(HTAB *hashp, long nelem) SpinLockInit(&(hctl->freeList[i].mutex)); /* - * Allocate space for the next greater power of two number of buckets, - * assuming a desired maximum load factor of 1. + * We've already calculated these parameters when we calculated how much + * space to allocate in hash_get_shared_size(). Be careful to keep these + * two places in sync, so that we get the same parameters. */ - nbuckets = next_pow2_int(nelem); - - /* - * In a partitioned table, nbuckets must be at least equal to - * num_partitions; were it less, keys with apparently different partition - * numbers would map to the same bucket, breaking partition independence. - * (Normally nbuckets will be much bigger; this is just a safety check.) - */ - while (nbuckets < hctl->num_partitions) - nbuckets <<= 1; + compute_buckets_and_segs(nelem, hctl->num_partitions, hctl->ssize, + &nbuckets, &nsegs); hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; - /* - * Figure number of directory segments needed, round up to a power of 2 - */ - nsegs = (nbuckets - 1) / hctl->ssize + 1; - nsegs = next_pow2_int(nsegs); - /* * Make sure directory is big enough. If pre-allocated directory is too * small, choke (caller screwed up). @@ -749,12 +795,22 @@ init_htab(HTAB *hashp, long nelem) } /* Allocate initial segments */ + i = 0; for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) { - *segp = seg_alloc(hashp); - if (*segp == NULL) - return false; + /* Assign initial segments, which are also pre-allocated */ + if (hashp->isshared) + { + *segp = (HASHSEGMENT) HASH_SEGMENT_PTR(hashp, i++); + MemSet(*segp, 0, HASH_SEGMENT_SIZE(hashp)); + } + else + { + *segp = seg_alloc(hashp); + i++; + } } + Assert(i == nsegs); /* Choose number of entries to allocate at a time */ hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); @@ -847,16 +903,60 @@ hash_select_dirsize(long num_entries) } /* - * Compute the required initial memory allocation for a shared-memory - * hashtable with the given parameters. We need space for the HASHHDR - * and for the (non expansible) directory. + * hash_get_shared_size -- determine memory needed for a new shared dynamic hash table + * + * info: hash table parameters + * flags: bitmask indicating which parameters to take from *info + * nelem: maximum number of elements expected + * + * Compute the required initial memory allocation for a hashtable with the given + * parameters. We need space for the HASHHDR, for the directory, segments and + * preallocated elements. + * + * For shared hash tables the directory size is non-expansive, and we preallocate + * all elements (nelem). */ Size -hash_get_shared_size(HASHCTL *info, int flags) +hash_get_shared_size(const HASHCTL *info, int flags, long nelem) { + int nbuckets; + int nsegs; + int num_partitions; + long ssize; + long dsize; + Size elementSize = HASH_ELEMENT_SIZE(info); + +#ifdef USE_ASSERT_CHECKING + /* shared hash tables have non-expansive directory */ + Assert(flags & HASH_SHARED_MEM); Assert(flags & HASH_DIRSIZE); Assert(info->dsize == info->max_dsize); - return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT); +#endif + + dsize = info->dsize; + + if (flags & HASH_SEGMENT) + ssize = info->ssize; + else + ssize = DEF_SEGSIZE; + + if (flags & HASH_PARTITION) + { + num_partitions = info->num_partitions; + + /* Number of entries should be atleast equal to the freelists */ + if (nelem < NUM_FREELISTS) + nelem = NUM_FREELISTS; + } + else + num_partitions = 0; + + compute_buckets_and_segs(nelem, num_partitions, ssize, + &nbuckets, &nsegs); + + return sizeof(HASHHDR) + dsize * sizeof(HASHSEGMENT) + + sizeof(HASHBUCKET) * ssize * nsegs + + nelem * elementSize; } @@ -1286,7 +1386,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx) * Failing because the needed element is in a different freelist is * not acceptable. */ - if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx)) + if ((newElement = element_alloc(hashp, hctl->nelem_alloc)) == NULL) { int borrow_from_idx; @@ -1323,6 +1423,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx) /* no elements available to borrow either, so out of memory */ return NULL; } + element_add(hashp, newElement, hctl->nelem_alloc, freelist_idx); } /* remove entry from freelist, bump nentries */ @@ -1701,29 +1802,43 @@ seg_alloc(HTAB *hashp) } /* - * allocate some new elements and link them into the indicated free list + * allocate some new elements */ -static bool -element_alloc(HTAB *hashp, int nelem, int freelist_idx) +static HASHELEMENT * +element_alloc(HTAB *hashp, int nelem) { HASHHDR *hctl = hashp->hctl; Size elementSize; - HASHELEMENT *firstElement; - HASHELEMENT *tmpElement; - HASHELEMENT *prevElement; - int i; + HASHELEMENT *firstElement = NULL; if (hashp->isfixed) - return false; + return NULL; /* Each element has a HASHELEMENT header plus user data. */ - elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize); - + elementSize = HASH_ELEMENT_SIZE(hctl); CurrentDynaHashCxt = hashp->hcxt; firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize); if (!firstElement) - return false; + return NULL; + + return firstElement; +} + +/* + * link the elements allocated by element_alloc into the indicated free list + */ +static void +element_add(HTAB *hashp, HASHELEMENT *firstElement, int nelem, int freelist_idx) +{ + HASHHDR *hctl = hashp->hctl; + Size elementSize; + HASHELEMENT *tmpElement; + HASHELEMENT *prevElement; + int i; + + /* Each element has a HASHELEMENT header plus user data. */ + elementSize = HASH_ELEMENT_SIZE(hctl); /* prepare to link all the new entries into the freelist */ prevElement = NULL; @@ -1745,8 +1860,6 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) if (IS_PARTITIONED(hctl)) SpinLockRelease(&hctl->freeList[freelist_idx].mutex); - - return true; } /* @@ -1958,3 +2071,34 @@ AtEOSubXact_HashTables(bool isCommit, int nestDepth) } } } + +/* + * Calculate the number of buckets and segments to store the given + * number of elements in a hash table. Segments contain buckets which + * in turn contain elements. + */ +static void +compute_buckets_and_segs(long nelem, long num_partitions, long ssize, + int *nbuckets, int *nsegments) +{ + /* + * Allocate space for the next greater power of two number of buckets, + * assuming a desired maximum load factor of 1. + */ + *nbuckets = next_pow2_int(nelem); + + /* + * In a partitioned table, nbuckets must be at least equal to + * num_partitions; were it less, keys with apparently different partition + * numbers would map to the same bucket, breaking partition independence. + * (Normally nbuckets will be much bigger; this is just a safety check.) + */ + while ((*nbuckets) < num_partitions) + (*nbuckets) <<= 1; + + /* + * Figure number of directory segments needed, round up to a power of 2 + */ + *nsegments = ((*nbuckets) - 1) / ssize + 1; + *nsegments = next_pow2_int(*nsegments); +} diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index 932cc4f34d90..5cefda1c22c7 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -151,7 +151,8 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp); extern Size hash_estimate_size(long num_entries, Size entrysize); extern long hash_select_dirsize(long num_entries); -extern Size hash_get_shared_size(HASHCTL *info, int flags); +extern Size hash_get_shared_size(const HASHCTL *info, int flags, + long nelem); extern void AtEOXact_HashTables(bool isCommit); extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);