postgrespro
diff --git a/‎src/backend/utils/adt/varbit.c
Lines changed: 77 additions & 88 deletions b/‎src/backend/utils/adt/varbit.c
Lines changed: 77 additions & 88 deletions
diff --git a/‎src/include/utils/varbit.h
Lines changed: 5 additions & 0 deletions b/‎src/include/utils/varbit.h
Lines changed: 5 additions & 0 deletions
@@ -3,6 +3,21 @@
  * varbit.c
  *	  Functions for the SQL datatypes BIT() and BIT VARYING().
  *
+ * The data structure contains the following elements:
+ *   header  -- length of the whole data structure (incl header)
+ *              in bytes (as with all varying length datatypes)
+ *   data section -- private data section for the bits data structures
+ *     bitlength -- length of the bit string in bits
+ *     bitdata   -- bit string, most significant byte first
+ *
+ * The length of the bitdata vector should always be exactly as many
+ * bytes as are needed for the given bitlength.  If the bitlength is
+ * not a multiple of 8, the extra low-order padding bits of the last
+ * byte must be zeroes.
+ *
+ * attypmod is defined as the length of the bit string in bits, or for
+ * varying bits the maximum length.
+ *
  * Code originally contributed by Adriaan Joubert.
  *
  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
@@ -27,6 +42,40 @@
 
 #define HEXDIG(z)	 ((z)<10 ? ((z)+'0') : ((z)-10+'A'))
 
+/* Mask off any bits that should be zero in the last byte of a bitstring */
+#define VARBIT_PAD(vb) \
+	do { \
+		int32	pad_ = VARBITPAD(vb); \
+		Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+		if (pad_ > 0) \
+			*(VARBITS(vb) + VARBITBYTES(vb) - 1) &= BITMASK << pad_; \
+	} while (0)
+
+/*
+ * Many functions work byte-by-byte, so they have a pointer handy to the
+ * last-plus-one byte, which saves a cycle or two.
+ */
+#define VARBIT_PAD_LAST(vb, ptr) \
+	do { \
+		int32	pad_ = VARBITPAD(vb); \
+		Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+		if (pad_ > 0) \
+			*((ptr) - 1) &= BITMASK << pad_; \
+	} while (0)
+
+/* Assert proper padding of a bitstring */
+#ifdef USE_ASSERT_CHECKING
+#define VARBIT_CORRECTLY_PADDED(vb) \
+	do { \
+		int32	pad_ = VARBITPAD(vb); \
+		Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+		Assert(pad_ == 0 || \
+			   (*(VARBITS(vb) + VARBITBYTES(vb) - 1) & ~(BITMASK << pad_)) == 0); \
+	} while (0)
+#else
+#define VARBIT_CORRECTLY_PADDED(vb) ((void) 0)
+#endif
+
 static VarBit *bit_catenate(VarBit *arg1, VarBit *arg2);
 static VarBit *bitsubstring(VarBit *arg, int32 s, int32 l,
 							bool length_not_specified);
@@ -87,24 +136,6 @@ anybit_typmodout(int32 typmod)
 }
 
 
-/*----------
- *	attypmod -- contains the length of the bit string in bits, or for
- *			   varying bits the maximum length.
- *
- *	The data structure contains the following elements:
- *	  header  -- length of the whole data structure (incl header)
- *				 in bytes. (as with all varying length datatypes)
- *	  data section -- private data section for the bits data structures
- *		bitlength -- length of the bit string in bits
- *		bitdata   -- bit string, most significant byte first
- *
- *	The length of the bitdata vector should always be exactly as many
- *	bytes as are needed for the given bitlength.  If the bitlength is
- *	not a multiple of 8, the extra low-order padding bits of the last
- *	byte must be zeroes.
- *----------
- */
-
 /*
  * bit_in -
  *	  converts a char string to the internal representation of a bitstring.
@@ -264,6 +295,9 @@ bit_out(PG_FUNCTION_ARGS)
 				len,
 				bitlen;
 
+	/* Assertion to help catch any bit functions that don't pad correctly */
+	VARBIT_CORRECTLY_PADDED(s);
+
 	bitlen = VARBITLEN(s);
 	len = (bitlen + 3) / 4;
 	result = (char *) palloc(len + 2);
@@ -304,8 +338,6 @@ bit_recv(PG_FUNCTION_ARGS)
 	VarBit	   *result;
 	int			len,
 				bitlen;
-	int			ipad;
-	bits8		mask;
 
 	bitlen = pq_getmsgint(buf, sizeof(int32));
 	if (bitlen < 0 || bitlen > VARBITMAXLEN)
@@ -330,13 +362,8 @@ bit_recv(PG_FUNCTION_ARGS)
 
 	pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
 
-	/* Make sure last byte is zero-padded if needed */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	/* Make sure last byte is correctly zero-padded */
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -367,8 +394,6 @@ bit(PG_FUNCTION_ARGS)
 	bool		isExplicit = PG_GETARG_BOOL(2);
 	VarBit	   *result;
 	int			rlen;
-	int			ipad;
-	bits8		mask;
 
 	/* No work if typmod is invalid or supplied data matches it already */
 	if (len <= 0 || len > VARBITMAXLEN || len == VARBITLEN(arg))
@@ -394,12 +419,7 @@ bit(PG_FUNCTION_ARGS)
 	 * if source data was shorter than target length (we assume the last byte
 	 * of the source data was itself correctly zero-padded).
 	 */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -574,6 +594,9 @@ varbit_out(PG_FUNCTION_ARGS)
 				k,
 				len;
 
+	/* Assertion to help catch any bit functions that don't pad correctly */
+	VARBIT_CORRECTLY_PADDED(s);
+
 	len = VARBITLEN(s);
 	result = (char *) palloc(len + 1);
 	sp = VARBITS(s);
@@ -620,8 +643,6 @@ varbit_recv(PG_FUNCTION_ARGS)
 	VarBit	   *result;
 	int			len,
 				bitlen;
-	int			ipad;
-	bits8		mask;
 
 	bitlen = pq_getmsgint(buf, sizeof(int32));
 	if (bitlen < 0 || bitlen > VARBITMAXLEN)
@@ -646,13 +667,8 @@ varbit_recv(PG_FUNCTION_ARGS)
 
 	pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
 
-	/* Make sure last byte is zero-padded if needed */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	/* Make sure last byte is correctly zero-padded */
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -729,8 +745,6 @@ varbit(PG_FUNCTION_ARGS)
 	bool		isExplicit = PG_GETARG_BOOL(2);
 	VarBit	   *result;
 	int			rlen;
-	int			ipad;
-	bits8		mask;
 
 	/* No work if typmod is invalid or supplied data matches it already */
 	if (len <= 0 || len >= VARBITLEN(arg))
@@ -749,13 +763,8 @@ varbit(PG_FUNCTION_ARGS)
 
 	memcpy(VARBITS(result), VARBITS(arg), VARBITBYTES(result));
 
-	/* Make sure last byte is zero-padded if needed */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	/* Make sure last byte is correctly zero-padded */
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1013,6 +1022,8 @@ bit_catenate(VarBit *arg1, VarBit *arg2)
 		}
 	}
 
+	/* The pad bits should be already zero at this point */
+
 	return result;
 }
 
@@ -1046,14 +1057,12 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified)
 	int			bitlen,
 				rbitlen,
 				len,
-				ipad = 0,
 				ishift,
 				i;
 	int			e,
 				s1,
 				e1;
-	bits8		mask,
-			   *r,
+	bits8	   *r,
 			   *ps;
 
 	bitlen = VARBITLEN(arg);
@@ -1118,13 +1127,9 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified)
 				r++;
 			}
 		}
-		/* Do we need to pad at the end? */
-		ipad = VARBITPAD(result);
-		if (ipad > 0)
-		{
-			mask = BITMASK << ipad;
-			*(VARBITS(result) + len - 1) &= mask;
-		}
+
+		/* Make sure last byte is correctly zero-padded */
+		VARBIT_PAD(result);
 	}
 
 	return result;
@@ -1246,7 +1251,7 @@ bit_and(PG_FUNCTION_ARGS)
 	for (i = 0; i < VARBITBYTES(arg1); i++)
 		*r++ = *p1++ & *p2++;
 
-	/* Padding is not needed as & of 0 pad is 0 */
+	/* Padding is not needed as & of 0 pads is 0 */
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1268,7 +1273,6 @@ bit_or(PG_FUNCTION_ARGS)
 	bits8	   *p1,
 			   *p2,
 			   *r;
-	bits8		mask;
 
 	bitlen1 = VARBITLEN(arg1);
 	bitlen2 = VARBITLEN(arg2);
@@ -1287,13 +1291,7 @@ bit_or(PG_FUNCTION_ARGS)
 	for (i = 0; i < VARBITBYTES(arg1); i++)
 		*r++ = *p1++ | *p2++;
 
-	/* Pad the result */
-	mask = BITMASK << VARBITPAD(result);
-	if (mask)
-	{
-		r--;
-		*r &= mask;
-	}
+	/* Padding is not needed as | of 0 pads is 0 */
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1315,7 +1313,6 @@ bitxor(PG_FUNCTION_ARGS)
 	bits8	   *p1,
 			   *p2,
 			   *r;
-	bits8		mask;
 
 	bitlen1 = VARBITLEN(arg1);
 	bitlen2 = VARBITLEN(arg2);
@@ -1335,13 +1332,7 @@ bitxor(PG_FUNCTION_ARGS)
 	for (i = 0; i < VARBITBYTES(arg1); i++)
 		*r++ = *p1++ ^ *p2++;
 
-	/* Pad the result */
-	mask = BITMASK << VARBITPAD(result);
-	if (mask)
-	{
-		r--;
-		*r &= mask;
-	}
+	/* Padding is not needed as ^ of 0 pads is 0 */
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1357,7 +1348,6 @@ bitnot(PG_FUNCTION_ARGS)
 	VarBit	   *result;
 	bits8	   *p,
 			   *r;
-	bits8		mask;
 
 	result = (VarBit *) palloc(VARSIZE(arg));
 	SET_VARSIZE(result, VARSIZE(arg));
@@ -1368,13 +1358,8 @@ bitnot(PG_FUNCTION_ARGS)
 	for (; p < VARBITEND(arg); p++)
 		*r++ = ~*p;
 
-	/* Pad the result */
-	mask = BITMASK << VARBITPAD(result);
-	if (mask)
-	{
-		r--;
-		*r &= mask;
-	}
+	/* Must zero-pad the result, because extra bits are surely 1's here */
+	VARBIT_PAD_LAST(result, r);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1441,6 +1426,8 @@ bitshiftleft(PG_FUNCTION_ARGS)
 			*r = 0;
 	}
 
+	/* The pad bits should be already zero at this point */
+
 	PG_RETURN_VARBIT_P(result);
 }
 
@@ -1507,6 +1494,8 @@ bitshiftright(PG_FUNCTION_ARGS)
 			if ((++r) < VARBITEND(result))
 				*r = (*p << (BITS_PER_BYTE - ishift)) & BITMASK;
 		}
+		/* We may have shifted 1's into the pad bits, so fix that */
+		VARBIT_PAD_LAST(result, r);
 	}
 
 	PG_RETURN_VARBIT_P(result);
 
@@ -21,6 +21,11 @@
 
 /*
  * Modeled on struct varlena from postgres.h, but data type is bits8.
+ *
+ * Caution: if bit_len is not a multiple of BITS_PER_BYTE, the low-order
+ * bits of the last byte of bit_dat[] are unused and MUST be zeroes.
+ * (This allows bit_cmp() to not bother masking the last byte.)
+ * Also, there should not be any excess bytes counted in the header length.
  */
 typedef struct
 {