@@ -260,12 +260,39 @@ static long hash_accesses,
260
260
hash_expansions ;
261
261
#endif
262
262
263
+ /* access to parts of the hash table, allocated as a single chunk */
264
+ #define HASH_DIRECTORY_PTR (hashp ) \
265
+ (((char *) (hashp)->hctl) + sizeof(HASHHDR))
266
+
267
+ #define HASH_SEGMENT_OFFSET (hctl , idx ) \
268
+ (sizeof(HASHHDR) + \
269
+ ((hctl)->dsize * sizeof(HASHSEGMENT)) + \
270
+ ((hctl)->ssize * (idx) * sizeof(HASHBUCKET)))
271
+
272
+ #define HASH_SEGMENT_PTR (hashp , idx ) \
273
+ ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, (idx)))
274
+
275
+ #define HASH_SEGMENT_SIZE (hashp ) \
276
+ ((hashp)->ssize * sizeof(HASHBUCKET))
277
+
278
+ #define HASH_ELEMENTS_PTR (hashp , nsegs ) \
279
+ ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, nsegs))
280
+
281
+ /* Each element has a HASHELEMENT header plus user data. */
282
+ #define HASH_ELEMENT_SIZE (hctl ) \
283
+ (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN((hctl)->entrysize))
284
+
285
+ #define HASH_ELEMENT_NEXT (hctl , num , ptr ) \
286
+ ((char *) (ptr) + ((num) * HASH_ELEMENT_SIZE(hctl)))
287
+
263
288
/*
264
289
* Private function prototypes
265
290
*/
266
291
static void * DynaHashAlloc (Size size );
267
292
static HASHSEGMENT seg_alloc (HTAB * hashp );
268
- static bool element_alloc (HTAB * hashp , int nelem , int freelist_idx );
293
+ static HASHELEMENT * element_alloc (HTAB * hashp , int nelem );
294
+ static void element_add (HTAB * hashp , HASHELEMENT * firstElement ,
295
+ int nelem , int freelist_idx );
269
296
static bool dir_realloc (HTAB * hashp );
270
297
static bool expand_table (HTAB * hashp );
271
298
static HASHBUCKET get_hash_entry (HTAB * hashp , int freelist_idx );
@@ -280,6 +307,9 @@ static int next_pow2_int(long num);
280
307
static void register_seq_scan (HTAB * hashp );
281
308
static void deregister_seq_scan (HTAB * hashp );
282
309
static bool has_seq_scans (HTAB * hashp );
310
+ static void compute_buckets_and_segs (long nelem , long num_partitions ,
311
+ long ssize ,
312
+ int * nbuckets , int * nsegments );
283
313
284
314
285
315
/*
@@ -569,12 +599,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
569
599
elog (ERROR , "failed to initialize hash table \"%s\"" , hashp -> tabname );
570
600
571
601
/*
602
+ * For a private hash table, preallocate the requested number of elements
603
+ * if it's less than our chosen nelem_alloc. This avoids wasting space if
604
+ * the caller correctly estimates a small table size.
605
+ *
572
606
* For a shared hash table, preallocate the requested number of elements.
573
607
* This reduces problems with run-time out-of-shared-memory conditions.
574
- *
575
- * For a non-shared hash table, preallocate the requested number of
576
- * elements if it's less than our chosen nelem_alloc. This avoids wasting
577
- * space if the caller correctly estimates a small table size.
578
608
*/
579
609
if ((flags & HASH_SHARED_MEM ) ||
580
610
nelem < hctl -> nelem_alloc )
@@ -583,6 +613,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
583
613
freelist_partitions ,
584
614
nelem_alloc ,
585
615
nelem_alloc_first ;
616
+ void * ptr = NULL ;
586
617
587
618
/*
588
619
* If hash table is partitioned, give each freelist an equal share of
@@ -607,14 +638,42 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
607
638
else
608
639
nelem_alloc_first = nelem_alloc ;
609
640
641
+ /*
642
+ * For a shared hash table, calculate the offset at which to find the
643
+ * first partition of elements. We have to skip space for the header,
644
+ * segments and buckets.
645
+ */
646
+ if (hashp -> isshared )
647
+ ptr = HASH_ELEMENTS_PTR (hashp , hctl -> nsegs );
648
+
610
649
for (i = 0 ; i < freelist_partitions ; i ++ )
611
650
{
612
651
int temp = (i == 0 ) ? nelem_alloc_first : nelem_alloc ;
613
652
614
- if (!element_alloc (hashp , temp , i ))
615
- ereport (ERROR ,
616
- (errcode (ERRCODE_OUT_OF_MEMORY ),
617
- errmsg ("out of memory" )));
653
+ /*
654
+ * Assign the correct location of each parition within a
655
+ * pre-allocated buffer.
656
+ *
657
+ * Actual memory allocation happens in ShmemInitHash for shared
658
+ * hash tables.
659
+ *
660
+ * We just need to split that allocation into per-batch freelists.
661
+ */
662
+ if (hashp -> isshared )
663
+ {
664
+ element_add (hashp , (HASHELEMENT * ) ptr , temp , i );
665
+ ptr = HASH_ELEMENT_NEXT (hctl , temp , ptr );
666
+ }
667
+ else
668
+ {
669
+ HASHELEMENT * firstElement = element_alloc (hashp , temp );
670
+
671
+ if (!firstElement )
672
+ ereport (ERROR ,
673
+ (errcode (ERRCODE_OUT_OF_MEMORY ),
674
+ errmsg ("out of memory" )));
675
+ element_add (hashp , firstElement , temp , i );
676
+ }
618
677
}
619
678
}
620
679
@@ -703,29 +762,16 @@ init_htab(HTAB *hashp, long nelem)
703
762
SpinLockInit (& (hctl -> freeList [i ].mutex ));
704
763
705
764
/*
706
- * Allocate space for the next greater power of two number of buckets,
707
- * assuming a desired maximum load factor of 1.
765
+ * We've already calculated these parameters when we calculated how much
766
+ * space to allocate in hash_get_shared_size(). Be careful to keep these
767
+ * two places in sync, so that we get the same parameters.
708
768
*/
709
- nbuckets = next_pow2_int (nelem );
710
-
711
- /*
712
- * In a partitioned table, nbuckets must be at least equal to
713
- * num_partitions; were it less, keys with apparently different partition
714
- * numbers would map to the same bucket, breaking partition independence.
715
- * (Normally nbuckets will be much bigger; this is just a safety check.)
716
- */
717
- while (nbuckets < hctl -> num_partitions )
718
- nbuckets <<= 1 ;
769
+ compute_buckets_and_segs (nelem , hctl -> num_partitions , hctl -> ssize ,
770
+ & nbuckets , & nsegs );
719
771
720
772
hctl -> max_bucket = hctl -> low_mask = nbuckets - 1 ;
721
773
hctl -> high_mask = (nbuckets << 1 ) - 1 ;
722
774
723
- /*
724
- * Figure number of directory segments needed, round up to a power of 2
725
- */
726
- nsegs = (nbuckets - 1 ) / hctl -> ssize + 1 ;
727
- nsegs = next_pow2_int (nsegs );
728
-
729
775
/*
730
776
* Make sure directory is big enough. If pre-allocated directory is too
731
777
* small, choke (caller screwed up).
@@ -749,12 +795,22 @@ init_htab(HTAB *hashp, long nelem)
749
795
}
750
796
751
797
/* Allocate initial segments */
798
+ i = 0 ;
752
799
for (segp = hashp -> dir ; hctl -> nsegs < nsegs ; hctl -> nsegs ++ , segp ++ )
753
800
{
754
- * segp = seg_alloc (hashp );
755
- if (* segp == NULL )
756
- return false;
801
+ /* Assign initial segments, which are also pre-allocated */
802
+ if (hashp -> isshared )
803
+ {
804
+ * segp = (HASHSEGMENT ) HASH_SEGMENT_PTR (hashp , i ++ );
805
+ MemSet (* segp , 0 , HASH_SEGMENT_SIZE (hashp ));
806
+ }
807
+ else
808
+ {
809
+ * segp = seg_alloc (hashp );
810
+ i ++ ;
811
+ }
757
812
}
813
+ Assert (i == nsegs );
758
814
759
815
/* Choose number of entries to allocate at a time */
760
816
hctl -> nelem_alloc = choose_nelem_alloc (hctl -> entrysize );
@@ -847,16 +903,60 @@ hash_select_dirsize(long num_entries)
847
903
}
848
904
849
905
/*
850
- * Compute the required initial memory allocation for a shared-memory
851
- * hashtable with the given parameters. We need space for the HASHHDR
852
- * and for the (non expansible) directory.
906
+ * hash_get_shared_size -- determine memory needed for a new shared dynamic hash table
907
+ *
908
+ * info: hash table parameters
909
+ * flags: bitmask indicating which parameters to take from *info
910
+ * nelem: maximum number of elements expected
911
+ *
912
+ * Compute the required initial memory allocation for a hashtable with the given
913
+ * parameters. We need space for the HASHHDR, for the directory, segments and
914
+ * preallocated elements.
915
+ *
916
+ * For shared hash tables the directory size is non-expansive, and we preallocate
917
+ * all elements (nelem).
853
918
*/
854
919
Size
855
- hash_get_shared_size (HASHCTL * info , int flags )
920
+ hash_get_shared_size (const HASHCTL * info , int flags , long nelem )
856
921
{
922
+ int nbuckets ;
923
+ int nsegs ;
924
+ int num_partitions ;
925
+ long ssize ;
926
+ long dsize ;
927
+ Size elementSize = HASH_ELEMENT_SIZE (info );
928
+
929
+ #ifdef USE_ASSERT_CHECKING
930
+ /* shared hash tables have non-expansive directory */
931
+ Assert (flags & HASH_SHARED_MEM );
857
932
Assert (flags & HASH_DIRSIZE );
858
933
Assert (info -> dsize == info -> max_dsize );
859
- return sizeof (HASHHDR ) + info -> dsize * sizeof (HASHSEGMENT );
934
+ #endif
935
+
936
+ dsize = info -> dsize ;
937
+
938
+ if (flags & HASH_SEGMENT )
939
+ ssize = info -> ssize ;
940
+ else
941
+ ssize = DEF_SEGSIZE ;
942
+
943
+ if (flags & HASH_PARTITION )
944
+ {
945
+ num_partitions = info -> num_partitions ;
946
+
947
+ /* Number of entries should be atleast equal to the freelists */
948
+ if (nelem < NUM_FREELISTS )
949
+ nelem = NUM_FREELISTS ;
950
+ }
951
+ else
952
+ num_partitions = 0 ;
953
+
954
+ compute_buckets_and_segs (nelem , num_partitions , ssize ,
955
+ & nbuckets , & nsegs );
956
+
957
+ return sizeof (HASHHDR ) + dsize * sizeof (HASHSEGMENT )
958
+ + sizeof (HASHBUCKET ) * ssize * nsegs
959
+ + nelem * elementSize ;
860
960
}
861
961
862
962
@@ -1286,7 +1386,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
1286
1386
* Failing because the needed element is in a different freelist is
1287
1387
* not acceptable.
1288
1388
*/
1289
- if (! element_alloc (hashp , hctl -> nelem_alloc , freelist_idx ) )
1389
+ if (( newElement = element_alloc (hashp , hctl -> nelem_alloc )) == NULL )
1290
1390
{
1291
1391
int borrow_from_idx ;
1292
1392
@@ -1323,6 +1423,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
1323
1423
/* no elements available to borrow either, so out of memory */
1324
1424
return NULL ;
1325
1425
}
1426
+ element_add (hashp , newElement , hctl -> nelem_alloc , freelist_idx );
1326
1427
}
1327
1428
1328
1429
/* remove entry from freelist, bump nentries */
@@ -1701,29 +1802,43 @@ seg_alloc(HTAB *hashp)
1701
1802
}
1702
1803
1703
1804
/*
1704
- * allocate some new elements and link them into the indicated free list
1805
+ * allocate some new elements
1705
1806
*/
1706
- static bool
1707
- element_alloc (HTAB * hashp , int nelem , int freelist_idx )
1807
+ static HASHELEMENT *
1808
+ element_alloc (HTAB * hashp , int nelem )
1708
1809
{
1709
1810
HASHHDR * hctl = hashp -> hctl ;
1710
1811
Size elementSize ;
1711
- HASHELEMENT * firstElement ;
1712
- HASHELEMENT * tmpElement ;
1713
- HASHELEMENT * prevElement ;
1714
- int i ;
1812
+ HASHELEMENT * firstElement = NULL ;
1715
1813
1716
1814
if (hashp -> isfixed )
1717
- return false ;
1815
+ return NULL ;
1718
1816
1719
1817
/* Each element has a HASHELEMENT header plus user data. */
1720
- elementSize = MAXALIGN (sizeof (HASHELEMENT )) + MAXALIGN (hctl -> entrysize );
1721
-
1818
+ elementSize = HASH_ELEMENT_SIZE (hctl );
1722
1819
CurrentDynaHashCxt = hashp -> hcxt ;
1723
1820
firstElement = (HASHELEMENT * ) hashp -> alloc (nelem * elementSize );
1724
1821
1725
1822
if (!firstElement )
1726
- return false;
1823
+ return NULL ;
1824
+
1825
+ return firstElement ;
1826
+ }
1827
+
1828
+ /*
1829
+ * link the elements allocated by element_alloc into the indicated free list
1830
+ */
1831
+ static void
1832
+ element_add (HTAB * hashp , HASHELEMENT * firstElement , int nelem , int freelist_idx )
1833
+ {
1834
+ HASHHDR * hctl = hashp -> hctl ;
1835
+ Size elementSize ;
1836
+ HASHELEMENT * tmpElement ;
1837
+ HASHELEMENT * prevElement ;
1838
+ int i ;
1839
+
1840
+ /* Each element has a HASHELEMENT header plus user data. */
1841
+ elementSize = HASH_ELEMENT_SIZE (hctl );
1727
1842
1728
1843
/* prepare to link all the new entries into the freelist */
1729
1844
prevElement = NULL ;
@@ -1745,8 +1860,6 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1745
1860
1746
1861
if (IS_PARTITIONED (hctl ))
1747
1862
SpinLockRelease (& hctl -> freeList [freelist_idx ].mutex );
1748
-
1749
- return true;
1750
1863
}
1751
1864
1752
1865
/*
@@ -1958,3 +2071,34 @@ AtEOSubXact_HashTables(bool isCommit, int nestDepth)
1958
2071
}
1959
2072
}
1960
2073
}
2074
+
2075
+ /*
2076
+ * Calculate the number of buckets and segments to store the given
2077
+ * number of elements in a hash table. Segments contain buckets which
2078
+ * in turn contain elements.
2079
+ */
2080
+ static void
2081
+ compute_buckets_and_segs (long nelem , long num_partitions , long ssize ,
2082
+ int * nbuckets , int * nsegments )
2083
+ {
2084
+ /*
2085
+ * Allocate space for the next greater power of two number of buckets,
2086
+ * assuming a desired maximum load factor of 1.
2087
+ */
2088
+ * nbuckets = next_pow2_int (nelem );
2089
+
2090
+ /*
2091
+ * In a partitioned table, nbuckets must be at least equal to
2092
+ * num_partitions; were it less, keys with apparently different partition
2093
+ * numbers would map to the same bucket, breaking partition independence.
2094
+ * (Normally nbuckets will be much bigger; this is just a safety check.)
2095
+ */
2096
+ while ((* nbuckets ) < num_partitions )
2097
+ (* nbuckets ) <<= 1 ;
2098
+
2099
+ /*
2100
+ * Figure number of directory segments needed, round up to a power of 2
2101
+ */
2102
+ * nsegments = ((* nbuckets ) - 1 ) / ssize + 1 ;
2103
+ * nsegments = next_pow2_int (* nsegments );
2104
+ }
0 commit comments