Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 00b4146

Browse files
committed
Require empty Bitmapsets to be represented as NULL.
When I designed the Bitmapset module, I set things up so that an empty Bitmapset could be represented either by a NULL pointer, or by an allocated object all of whose bits are zero. I've recently come to the conclusion that that was a bad idea and we should instead have a convention like the longstanding invariant for Lists, whereby an empty list is represented by NIL and nothing else. To do this, we need to fix bms_intersect, bms_difference, and a couple of other functions to check for having produced an empty result; but then we can replace bms_is_empty(a) by a simple "a == NULL" test. This is very likely a (marginal) win performance-wise, because we call bms_is_empty many more times than those other functions put together. However, the real reason to do it is that we have various places that have hand-implemented a rule about "this Bitmapset variable must be exactly NULL if empty", so that they can use checks-for-null in place of bms_is_empty calls in particularly hot code paths. That is a really fragile, mistake-prone way to do things, and I'm surprised that we've seldom been bitten by it. It's not well documented at all which variables have this property, so you can't readily tell which code might be violating those conventions. By making the convention universal, we can eliminate a subtle source of bugs. Patch by me; thanks to Nathan Bossart and Richard Guo for review. Discussion: https://postgr.es/m/1159933.1677621588@sss.pgh.pa.us
1 parent 141225b commit 00b4146

File tree

2 files changed

+57
-23
lines changed

2 files changed

+57
-23
lines changed

src/backend/nodes/bitmapset.c

+52-18
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@
55
*
66
* A bitmap set can represent any set of nonnegative integers, although
77
* it is mainly intended for sets where the maximum value is not large,
8-
* say at most a few hundred. By convention, a NULL pointer is always
9-
* accepted by all operations to represent the empty set. (But beware
10-
* that this is not the only representation of the empty set. Use
11-
* bms_is_empty() in preference to testing for NULL.)
8+
* say at most a few hundred. By convention, we always represent the
9+
* empty set by a NULL pointer.
1210
*
1311
*
1412
* Copyright (c) 2003-2023, PostgreSQL Global Development Group
@@ -66,6 +64,8 @@
6664
#error "invalid BITS_PER_BITMAPWORD"
6765
#endif
6866

67+
static bool bms_is_empty_internal(const Bitmapset *a);
68+
6969

7070
/*
7171
* bms_copy - make a palloc'd copy of a bitmapset
@@ -104,10 +104,10 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
104104
{
105105
if (b == NULL)
106106
return true;
107-
return bms_is_empty(b);
107+
return false;
108108
}
109109
else if (b == NULL)
110-
return bms_is_empty(a);
110+
return false;
111111
/* Identify shorter and longer input */
112112
if (a->nwords <= b->nwords)
113113
{
@@ -151,9 +151,9 @@ bms_compare(const Bitmapset *a, const Bitmapset *b)
151151

152152
/* Handle cases where either input is NULL */
153153
if (a == NULL)
154-
return bms_is_empty(b) ? 0 : -1;
154+
return (b == NULL) ? 0 : -1;
155155
else if (b == NULL)
156-
return bms_is_empty(a) ? 0 : +1;
156+
return +1;
157157
/* Handle cases where one input is longer than the other */
158158
shortlen = Min(a->nwords, b->nwords);
159159
for (i = shortlen; i < a->nwords; i++)
@@ -282,6 +282,12 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
282282
resultlen = result->nwords;
283283
for (i = 0; i < resultlen; i++)
284284
result->words[i] &= other->words[i];
285+
/* If we computed an empty result, we must return NULL */
286+
if (bms_is_empty_internal(result))
287+
{
288+
pfree(result);
289+
return NULL;
290+
}
285291
return result;
286292
}
287293

@@ -300,12 +306,22 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
300306
return NULL;
301307
if (b == NULL)
302308
return bms_copy(a);
309+
310+
/*
311+
* In Postgres' usage, an empty result is a very common case, so it's
312+
* worth optimizing for that by testing bms_nonempty_difference(). This
313+
* saves us a palloc/pfree cycle compared to checking after-the-fact.
314+
*/
315+
if (!bms_nonempty_difference(a, b))
316+
return NULL;
317+
303318
/* Copy the left input */
304319
result = bms_copy(a);
305320
/* And remove b's bits from result */
306321
shortlen = Min(a->nwords, b->nwords);
307322
for (i = 0; i < shortlen; i++)
308323
result->words[i] &= ~b->words[i];
324+
/* Need not check for empty result, since we handled that case above */
309325
return result;
310326
}
311327

@@ -323,7 +339,7 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b)
323339
if (a == NULL)
324340
return true; /* empty set is a subset of anything */
325341
if (b == NULL)
326-
return bms_is_empty(a);
342+
return false;
327343
/* Check common words */
328344
shortlen = Min(a->nwords, b->nwords);
329345
for (i = 0; i < shortlen; i++)
@@ -362,10 +378,10 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
362378
{
363379
if (b == NULL)
364380
return BMS_EQUAL;
365-
return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
381+
return BMS_SUBSET1;
366382
}
367383
if (b == NULL)
368-
return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
384+
return BMS_SUBSET2;
369385
/* Check common words */
370386
result = BMS_EQUAL; /* status so far */
371387
shortlen = Min(a->nwords, b->nwords);
@@ -554,7 +570,7 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
554570
if (a == NULL)
555571
return false;
556572
if (b == NULL)
557-
return !bms_is_empty(a);
573+
return true;
558574
/* Check words in common */
559575
shortlen = Min(a->nwords, b->nwords);
560576
for (i = 0; i < shortlen; i++)
@@ -696,18 +712,18 @@ bms_membership(const Bitmapset *a)
696712
}
697713

698714
/*
699-
* bms_is_empty - is a set empty?
715+
* bms_is_empty_internal - is a set empty?
700716
*
701-
* This is even faster than bms_membership().
717+
* This is now used only locally, to detect cases where a function has
718+
* computed an empty set that we must now get rid of. Hence, we can
719+
* assume the input isn't NULL.
702720
*/
703-
bool
704-
bms_is_empty(const Bitmapset *a)
721+
static bool
722+
bms_is_empty_internal(const Bitmapset *a)
705723
{
706724
int nwords;
707725
int wordnum;
708726

709-
if (a == NULL)
710-
return true;
711727
nwords = a->nwords;
712728
for (wordnum = 0; wordnum < nwords; wordnum++)
713729
{
@@ -786,6 +802,12 @@ bms_del_member(Bitmapset *a, int x)
786802
bitnum = BITNUM(x);
787803
if (wordnum < a->nwords)
788804
a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
805+
/* If we computed an empty result, we must return NULL */
806+
if (bms_is_empty_internal(a))
807+
{
808+
pfree(a);
809+
return NULL;
810+
}
789811
return a;
790812
}
791813

@@ -922,6 +944,12 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
922944
a->words[i] &= b->words[i];
923945
for (; i < a->nwords; i++)
924946
a->words[i] = 0;
947+
/* If we computed an empty result, we must return NULL */
948+
if (bms_is_empty_internal(a))
949+
{
950+
pfree(a);
951+
return NULL;
952+
}
925953
return a;
926954
}
927955

@@ -943,6 +971,12 @@ bms_del_members(Bitmapset *a, const Bitmapset *b)
943971
shortlen = Min(a->nwords, b->nwords);
944972
for (i = 0; i < shortlen; i++)
945973
a->words[i] &= ~b->words[i];
974+
/* If we computed an empty result, we must return NULL */
975+
if (bms_is_empty_internal(a))
976+
{
977+
pfree(a);
978+
return NULL;
979+
}
946980
return a;
947981
}
948982

src/include/nodes/bitmapset.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@
55
*
66
* A bitmap set can represent any set of nonnegative integers, although
77
* it is mainly intended for sets where the maximum value is not large,
8-
* say at most a few hundred. By convention, a NULL pointer is always
9-
* accepted by all operations to represent the empty set. (But beware
10-
* that this is not the only representation of the empty set. Use
11-
* bms_is_empty() in preference to testing for NULL.)
8+
* say at most a few hundred. By convention, we always represent the
9+
* empty set by a NULL pointer.
1210
*
1311
*
1412
* Copyright (c) 2003-2023, PostgreSQL Global Development Group
@@ -102,7 +100,9 @@ extern int bms_num_members(const Bitmapset *a);
102100

103101
/* optimized tests when we don't need to know exact membership count: */
104102
extern BMS_Membership bms_membership(const Bitmapset *a);
105-
extern bool bms_is_empty(const Bitmapset *a);
103+
104+
/* NULL is now the only allowed representation of an empty bitmapset */
105+
#define bms_is_empty(a) ((a) == NULL)
106106

107107
/* these routines recycle (modify or free) their non-const inputs: */
108108

0 commit comments

Comments
 (0)