Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 352d297

Browse files
committed
dshash: Add sequential scan support.
Add ability to scan all entries sequentially to dshash. The interface is similar but a bit different both from that of dynahash and simple dshash search functions. The most significant differences is that dshash's interfac always needs a call to dshash_seq_term when scan ends. Another is locking. Dshash holds partition lock when returning an entry, dshash_seq_next() also holds lock when returning an entry but callers shouldn't release it, since the lock is essential to continue a scan. The seqscan interface allows entry deletion while a scan is in progress using dshash_delete_current(). Reviewed-By: Andres Freund <andres@anarazel.de> Author: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
1 parent adb5c28 commit 352d297

File tree

3 files changed

+186
-1
lines changed

3 files changed

+186
-1
lines changed

src/backend/lib/dshash.c

+162-1
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ struct dshash_table
127127
#define NUM_SPLITS(size_log2) \
128128
(size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
129129

130+
/* How many buckets are there in a given size? */
131+
#define NUM_BUCKETS(size_log2) \
132+
(((size_t) 1) << (size_log2))
133+
130134
/* How many buckets are there in each partition at a given size? */
131135
#define BUCKETS_PER_PARTITION(size_log2) \
132136
(((size_t) 1) << NUM_SPLITS(size_log2))
@@ -153,6 +157,10 @@ struct dshash_table
153157
#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
154158
((partition) << NUM_SPLITS(size_log2))
155159

160+
/* Choose partition based on bucket index. */
161+
#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
162+
((bucket_idx) >> NUM_SPLITS(size_log2))
163+
156164
/* The head of the active bucket for a given hash value (lvalue). */
157165
#define BUCKET_FOR_HASH(hash_table, hash) \
158166
(hash_table->buckets[ \
@@ -324,7 +332,7 @@ dshash_destroy(dshash_table *hash_table)
324332
ensure_valid_bucket_pointers(hash_table);
325333

326334
/* Free all the entries. */
327-
size = ((size_t) 1) << hash_table->size_log2;
335+
size = NUM_BUCKETS(hash_table->size_log2);
328336
for (i = 0; i < size; ++i)
329337
{
330338
dsa_pointer item_pointer = hash_table->buckets[i];
@@ -592,6 +600,159 @@ dshash_memhash(const void *v, size_t size, void *arg)
592600
return tag_hash(v, size);
593601
}
594602

603+
/*
604+
* dshash_seq_init/_next/_term
605+
* Sequentially scan through dshash table and return all the
606+
* elements one by one, return NULL when no more.
607+
*
608+
* dshash_seq_term should always be called when a scan finished.
609+
* The caller may delete returned elements midst of a scan by using
610+
* dshash_delete_current(). exclusive must be true to delete elements.
611+
*/
612+
void
613+
dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
614+
bool exclusive)
615+
{
616+
status->hash_table = hash_table;
617+
status->curbucket = 0;
618+
status->nbuckets = 0;
619+
status->curitem = NULL;
620+
status->pnextitem = InvalidDsaPointer;
621+
status->curpartition = -1;
622+
status->exclusive = exclusive;
623+
}
624+
625+
/*
626+
* Returns the next element.
627+
*
628+
* Returned elements are locked and the caller must not explicitly release
629+
* it. It is released at the next call to dshash_next().
630+
*/
631+
void *
632+
dshash_seq_next(dshash_seq_status *status)
633+
{
634+
dsa_pointer next_item_pointer;
635+
636+
if (status->curitem == NULL)
637+
{
638+
int partition;
639+
640+
Assert(status->curbucket == 0);
641+
Assert(!status->hash_table->find_locked);
642+
643+
/* first shot. grab the first item. */
644+
partition =
645+
PARTITION_FOR_BUCKET_INDEX(status->curbucket,
646+
status->hash_table->size_log2);
647+
LWLockAcquire(PARTITION_LOCK(status->hash_table, partition),
648+
status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
649+
status->curpartition = partition;
650+
651+
/* resize doesn't happen from now until seq scan ends */
652+
status->nbuckets =
653+
NUM_BUCKETS(status->hash_table->control->size_log2);
654+
ensure_valid_bucket_pointers(status->hash_table);
655+
656+
next_item_pointer = status->hash_table->buckets[status->curbucket];
657+
}
658+
else
659+
next_item_pointer = status->pnextitem;
660+
661+
Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
662+
status->curpartition),
663+
status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
664+
665+
/* Move to the next bucket if we finished the current bucket */
666+
while (!DsaPointerIsValid(next_item_pointer))
667+
{
668+
int next_partition;
669+
670+
if (++status->curbucket >= status->nbuckets)
671+
{
672+
/* all buckets have been scanned. finish. */
673+
return NULL;
674+
}
675+
676+
/* Check if move to the next partition */
677+
next_partition =
678+
PARTITION_FOR_BUCKET_INDEX(status->curbucket,
679+
status->hash_table->size_log2);
680+
681+
if (status->curpartition != next_partition)
682+
{
683+
/*
684+
* Move to the next partition. Lock the next partition then
685+
* release the current, not in the reverse order to avoid
686+
* concurrent resizing. Avoid dead lock by taking lock in the
687+
* same order with resize().
688+
*/
689+
LWLockAcquire(PARTITION_LOCK(status->hash_table,
690+
next_partition),
691+
status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
692+
LWLockRelease(PARTITION_LOCK(status->hash_table,
693+
status->curpartition));
694+
status->curpartition = next_partition;
695+
}
696+
697+
next_item_pointer = status->hash_table->buckets[status->curbucket];
698+
}
699+
700+
status->curitem =
701+
dsa_get_address(status->hash_table->area, next_item_pointer);
702+
status->hash_table->find_locked = true;
703+
status->hash_table->find_exclusively_locked = status->exclusive;
704+
705+
/*
706+
* The caller may delete the item. Store the next item in case of
707+
* deletion.
708+
*/
709+
status->pnextitem = status->curitem->next;
710+
711+
return ENTRY_FROM_ITEM(status->curitem);
712+
}
713+
714+
/*
715+
* Terminates the seqscan and release all locks.
716+
*
717+
* Should be always called when finishing or exiting a seqscan.
718+
*/
719+
void
720+
dshash_seq_term(dshash_seq_status *status)
721+
{
722+
status->hash_table->find_locked = false;
723+
status->hash_table->find_exclusively_locked = false;
724+
725+
if (status->curpartition >= 0)
726+
LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
727+
}
728+
729+
/* Remove the current entry while a seq scan. */
730+
void
731+
dshash_delete_current(dshash_seq_status *status)
732+
{
733+
dshash_table *hash_table = status->hash_table;
734+
dshash_table_item *item = status->curitem;
735+
size_t partition PG_USED_FOR_ASSERTS_ONLY;
736+
737+
partition = PARTITION_FOR_HASH(item->hash);
738+
739+
Assert(status->exclusive);
740+
Assert(hash_table->control->magic == DSHASH_MAGIC);
741+
Assert(hash_table->find_locked);
742+
Assert(hash_table->find_exclusively_locked);
743+
Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
744+
LW_EXCLUSIVE));
745+
746+
delete_item(hash_table, item);
747+
}
748+
749+
/* Get the current entry while a seq scan. */
750+
void *
751+
dshash_get_current(dshash_seq_status *status)
752+
{
753+
return ENTRY_FROM_ITEM(status->curitem);
754+
}
755+
595756
/*
596757
* Print debugging information about the internal state of the hash table to
597758
* stderr. The caller must hold no partition locks.

src/include/lib/dshash.h

+23
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,21 @@ typedef struct dshash_parameters
5959
struct dshash_table_item;
6060
typedef struct dshash_table_item dshash_table_item;
6161

62+
/*
63+
* Sequential scan state. The detail is exposed to let users know the storage
64+
* size but it should be considered as an opaque type by callers.
65+
*/
66+
typedef struct dshash_seq_status
67+
{
68+
dshash_table *hash_table; /* dshash table working on */
69+
int curbucket; /* bucket number we are at */
70+
int nbuckets; /* total number of buckets in the dshash */
71+
dshash_table_item *curitem; /* item we are currently at */
72+
dsa_pointer pnextitem; /* dsa-pointer to the next item */
73+
int curpartition; /* partition number we are at */
74+
bool exclusive; /* locking mode */
75+
} dshash_seq_status;
76+
6277
/* Creating, sharing and destroying from hash tables. */
6378
extern dshash_table *dshash_create(dsa_area *area,
6479
const dshash_parameters *params,
@@ -80,6 +95,14 @@ extern bool dshash_delete_key(dshash_table *hash_table, const void *key);
8095
extern void dshash_delete_entry(dshash_table *hash_table, void *entry);
8196
extern void dshash_release_lock(dshash_table *hash_table, void *entry);
8297

98+
/* seq scan support */
99+
extern void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
100+
bool exclusive);
101+
extern void *dshash_seq_next(dshash_seq_status *status);
102+
extern void dshash_seq_term(dshash_seq_status *status);
103+
extern void dshash_delete_current(dshash_seq_status *status);
104+
extern void *dshash_get_current(dshash_seq_status *status);
105+
83106
/* Convenience hash and compare functions wrapping memcmp and tag_hash. */
84107
extern int dshash_memcmp(const void *a, const void *b, size_t size, void *arg);
85108
extern dshash_hash dshash_memhash(const void *v, size_t size, void *arg);

src/tools/pgindent/typedefs.list

+1
Original file line numberDiff line numberDiff line change
@@ -3103,6 +3103,7 @@ dshash_hash
31033103
dshash_hash_function
31043104
dshash_parameters
31053105
dshash_partition
3106+
dshash_seq_status
31063107
dshash_table
31073108
dshash_table_control
31083109
dshash_table_handle

0 commit comments

Comments
 (0)