@@ -132,6 +132,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
132
132
static void bt_tuple_present_callback (Relation index , HeapTuple htup ,
133
133
Datum * values , bool * isnull ,
134
134
bool tupleIsAlive , void * checkstate );
135
+ static IndexTuple bt_normalize_tuple (BtreeCheckState * state ,
136
+ IndexTuple itup );
135
137
static inline bool offset_is_negative_infinity (BTPageOpaque opaque ,
136
138
OffsetNumber offset );
137
139
static inline bool invariant_leq_offset (BtreeCheckState * state ,
@@ -907,7 +909,16 @@ bt_target_page_check(BtreeCheckState *state)
907
909
908
910
/* Fingerprint leaf page tuples (those that point to the heap) */
909
911
if (state -> heapallindexed && P_ISLEAF (topaque ) && !ItemIdIsDead (itemid ))
910
- bloom_add_element (state -> filter , (unsigned char * ) itup , tupsize );
912
+ {
913
+ IndexTuple norm ;
914
+
915
+ norm = bt_normalize_tuple (state , itup );
916
+ bloom_add_element (state -> filter , (unsigned char * ) norm ,
917
+ IndexTupleSize (norm ));
918
+ /* Be tidy */
919
+ if (norm != itup )
920
+ pfree (norm );
921
+ }
911
922
912
923
/*
913
924
* * High key check *
@@ -1671,35 +1682,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
1671
1682
bool * isnull , bool tupleIsAlive , void * checkstate )
1672
1683
{
1673
1684
BtreeCheckState * state = (BtreeCheckState * ) checkstate ;
1674
- IndexTuple itup ;
1685
+ IndexTuple itup , norm ;
1675
1686
1676
1687
Assert (state -> heapallindexed );
1677
1688
1678
- /*
1679
- * Generate an index tuple for fingerprinting.
1680
- *
1681
- * Index tuple formation is assumed to be deterministic, and IndexTuples
1682
- * are assumed immutable. While the LP_DEAD bit is mutable in leaf pages,
1683
- * that's ItemId metadata, which was not fingerprinted. (There will often
1684
- * be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
1685
- * but we only try to detect the absence of needed tuples, so that's
1686
- * okay.)
1687
- *
1688
- * Note that we rely on deterministic index_form_tuple() TOAST
1689
- * compression. If index_form_tuple() was ever enhanced to compress datums
1690
- * out-of-line, or otherwise varied when or how compression was applied,
1691
- * our assumption would break, leading to false positive reports of
1692
- * corruption. It's also possible that non-pivot tuples could in the
1693
- * future have alternative equivalent representations (e.g. by using the
1694
- * INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
1695
- * values as part of fingerprinting.
1696
- */
1689
+ /* Generate a normalized index tuple for fingerprinting */
1697
1690
itup = index_form_tuple (RelationGetDescr (index ), values , isnull );
1698
1691
itup -> t_tid = htup -> t_self ;
1692
+ norm = bt_normalize_tuple (state , itup );
1699
1693
1700
1694
/* Probe Bloom filter -- tuple should be present */
1701
- if (bloom_lacks_element (state -> filter , (unsigned char * ) itup ,
1702
- IndexTupleSize (itup )))
1695
+ if (bloom_lacks_element (state -> filter , (unsigned char * ) norm ,
1696
+ IndexTupleSize (norm )))
1703
1697
ereport (ERROR ,
1704
1698
(errcode (ERRCODE_DATA_CORRUPTED ),
1705
1699
errmsg ("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"" ,
@@ -1713,6 +1707,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
1713
1707
1714
1708
state -> heaptuplespresent ++ ;
1715
1709
pfree (itup );
1710
+ /* Cannot leak memory here */
1711
+ if (norm != itup )
1712
+ pfree (norm );
1713
+ }
1714
+
1715
+ /*
1716
+ * Normalize an index tuple for fingerprinting.
1717
+ *
1718
+ * In general, index tuple formation is assumed to be deterministic by
1719
+ * heapallindexed verification, and IndexTuples are assumed immutable. While
1720
+ * the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
1721
+ * not fingerprinted. Normalization is required to compensate for corner
1722
+ * cases where the determinism assumption doesn't quite work.
1723
+ *
1724
+ * There is currently one such case: index_form_tuple() does not try to hide
1725
+ * the source TOAST state of input datums. The executor applies TOAST
1726
+ * compression for heap tuples based on different criteria to the compression
1727
+ * applied within btinsert()'s call to index_form_tuple(): it sometimes
1728
+ * compresses more aggressively, resulting in compressed heap tuple datums but
1729
+ * uncompressed corresponding index tuple datums. A subsequent heapallindexed
1730
+ * verification will get a logically equivalent though bitwise unequal tuple
1731
+ * from index_form_tuple(). False positive heapallindexed corruption reports
1732
+ * could occur without normalizing away the inconsistency.
1733
+ *
1734
+ * Returned tuple is often caller's own original tuple. Otherwise, it is a
1735
+ * new representation of caller's original index tuple, palloc()'d in caller's
1736
+ * memory context.
1737
+ *
1738
+ * Note: This routine is not concerned with distinctions about the
1739
+ * representation of tuples beyond those that might break heapallindexed
1740
+ * verification. In particular, it won't try to normalize opclass-equal
1741
+ * datums with potentially distinct representations (e.g., btree/numeric_ops
1742
+ * index datums will not get their display scale normalized-away here).
1743
+ * Normalization may need to be expanded to handle more cases in the future,
1744
+ * though. For example, it's possible that non-pivot tuples could in the
1745
+ * future have alternative logically equivalent representations due to using
1746
+ * the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
1747
+ */
1748
+ static IndexTuple
1749
+ bt_normalize_tuple (BtreeCheckState * state , IndexTuple itup )
1750
+ {
1751
+ TupleDesc tupleDescriptor = RelationGetDescr (state -> rel );
1752
+ Datum normalized [INDEX_MAX_KEYS ];
1753
+ bool isnull [INDEX_MAX_KEYS ];
1754
+ bool toast_free [INDEX_MAX_KEYS ];
1755
+ bool formnewtup = false;
1756
+ IndexTuple reformed ;
1757
+ int i ;
1758
+
1759
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
1760
+ if (!IndexTupleHasVarwidths (itup ))
1761
+ return itup ;
1762
+
1763
+ for (i = 0 ; i < tupleDescriptor -> natts ; i ++ )
1764
+ {
1765
+ Form_pg_attribute att ;
1766
+
1767
+ att = TupleDescAttr (tupleDescriptor , i );
1768
+
1769
+ /* Assume untoasted/already normalized datum initially */
1770
+ toast_free [i ] = false;
1771
+ normalized [i ] = index_getattr (itup , att -> attnum ,
1772
+ tupleDescriptor ,
1773
+ & isnull [i ]);
1774
+ if (att -> attbyval || att -> attlen != -1 || isnull [i ])
1775
+ continue ;
1776
+
1777
+ /*
1778
+ * Callers always pass a tuple that could safely be inserted into the
1779
+ * index without further processing, so an external varlena header
1780
+ * should never be encountered here
1781
+ */
1782
+ if (VARATT_IS_EXTERNAL (DatumGetPointer (normalized [i ])))
1783
+ ereport (ERROR ,
1784
+ (errcode (ERRCODE_INDEX_CORRUPTED ),
1785
+ errmsg ("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"" ,
1786
+ ItemPointerGetBlockNumber (& (itup -> t_tid )),
1787
+ ItemPointerGetOffsetNumber (& (itup -> t_tid )),
1788
+ RelationGetRelationName (state -> rel ))));
1789
+ else if (VARATT_IS_COMPRESSED (DatumGetPointer (normalized [i ])))
1790
+ {
1791
+ formnewtup = true;
1792
+ normalized [i ] = PointerGetDatum (PG_DETOAST_DATUM (normalized [i ]));
1793
+ toast_free [i ] = true;
1794
+ }
1795
+ }
1796
+
1797
+ /* Easier case: Tuple has varlena datums, none of which are compressed */
1798
+ if (!formnewtup )
1799
+ return itup ;
1800
+
1801
+ /*
1802
+ * Hard case: Tuple had compressed varlena datums that necessitate
1803
+ * creating normalized version of the tuple from uncompressed input datums
1804
+ * (normalized input datums). This is rather naive, but shouldn't be
1805
+ * necessary too often.
1806
+ *
1807
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
1808
+ * of normalized input.
1809
+ */
1810
+ reformed = index_form_tuple (tupleDescriptor , normalized , isnull );
1811
+ reformed -> t_tid = itup -> t_tid ;
1812
+
1813
+ /* Cannot leak memory here */
1814
+ for (i = 0 ; i < tupleDescriptor -> natts ; i ++ )
1815
+ if (toast_free [i ])
1816
+ pfree (DatumGetPointer (normalized [i ]));
1817
+
1818
+ return reformed ;
1716
1819
}
1717
1820
1718
1821
/*
0 commit comments