@@ -133,6 +133,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
133
133
static void bt_tuple_present_callback (Relation index , HeapTuple htup ,
134
134
Datum * values , bool * isnull ,
135
135
bool tupleIsAlive , void * checkstate );
136
+ static IndexTuple bt_normalize_tuple (BtreeCheckState * state ,
137
+ IndexTuple itup );
136
138
static inline bool offset_is_negative_infinity (BTPageOpaque opaque ,
137
139
OffsetNumber offset );
138
140
static inline bool invariant_leq_offset (BtreeCheckState * state ,
@@ -908,7 +910,16 @@ bt_target_page_check(BtreeCheckState *state)
908
910
909
911
/* Fingerprint leaf page tuples (those that point to the heap) */
910
912
if (state -> heapallindexed && P_ISLEAF (topaque ) && !ItemIdIsDead (itemid ))
911
- bloom_add_element (state -> filter , (unsigned char * ) itup , tupsize );
913
+ {
914
+ IndexTuple norm ;
915
+
916
+ norm = bt_normalize_tuple (state , itup );
917
+ bloom_add_element (state -> filter , (unsigned char * ) norm ,
918
+ IndexTupleSize (norm ));
919
+ /* Be tidy */
920
+ if (norm != itup )
921
+ pfree (norm );
922
+ }
912
923
913
924
/*
914
925
* * High key check *
@@ -1672,35 +1683,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
1672
1683
bool * isnull , bool tupleIsAlive , void * checkstate )
1673
1684
{
1674
1685
BtreeCheckState * state = (BtreeCheckState * ) checkstate ;
1675
- IndexTuple itup ;
1686
+ IndexTuple itup , norm ;
1676
1687
1677
1688
Assert (state -> heapallindexed );
1678
1689
1679
- /*
1680
- * Generate an index tuple for fingerprinting.
1681
- *
1682
- * Index tuple formation is assumed to be deterministic, and IndexTuples
1683
- * are assumed immutable. While the LP_DEAD bit is mutable in leaf pages,
1684
- * that's ItemId metadata, which was not fingerprinted. (There will often
1685
- * be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
1686
- * but we only try to detect the absence of needed tuples, so that's
1687
- * okay.)
1688
- *
1689
- * Note that we rely on deterministic index_form_tuple() TOAST
1690
- * compression. If index_form_tuple() was ever enhanced to compress datums
1691
- * out-of-line, or otherwise varied when or how compression was applied,
1692
- * our assumption would break, leading to false positive reports of
1693
- * corruption. It's also possible that non-pivot tuples could in the
1694
- * future have alternative equivalent representations (e.g. by using the
1695
- * INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
1696
- * values as part of fingerprinting.
1697
- */
1690
+ /* Generate a normalized index tuple for fingerprinting */
1698
1691
itup = index_form_tuple (RelationGetDescr (index ), values , isnull );
1699
1692
itup -> t_tid = htup -> t_self ;
1693
+ norm = bt_normalize_tuple (state , itup );
1700
1694
1701
1695
/* Probe Bloom filter -- tuple should be present */
1702
- if (bloom_lacks_element (state -> filter , (unsigned char * ) itup ,
1703
- IndexTupleSize (itup )))
1696
+ if (bloom_lacks_element (state -> filter , (unsigned char * ) norm ,
1697
+ IndexTupleSize (norm )))
1704
1698
ereport (ERROR ,
1705
1699
(errcode (ERRCODE_DATA_CORRUPTED ),
1706
1700
errmsg ("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"" ,
@@ -1714,6 +1708,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
1714
1708
1715
1709
state -> heaptuplespresent ++ ;
1716
1710
pfree (itup );
1711
+ /* Cannot leak memory here */
1712
+ if (norm != itup )
1713
+ pfree (norm );
1714
+ }
1715
+
1716
+ /*
1717
+ * Normalize an index tuple for fingerprinting.
1718
+ *
1719
+ * In general, index tuple formation is assumed to be deterministic by
1720
+ * heapallindexed verification, and IndexTuples are assumed immutable. While
1721
+ * the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
1722
+ * not fingerprinted. Normalization is required to compensate for corner
1723
+ * cases where the determinism assumption doesn't quite work.
1724
+ *
1725
+ * There is currently one such case: index_form_tuple() does not try to hide
1726
+ * the source TOAST state of input datums. The executor applies TOAST
1727
+ * compression for heap tuples based on different criteria to the compression
1728
+ * applied within btinsert()'s call to index_form_tuple(): it sometimes
1729
+ * compresses more aggressively, resulting in compressed heap tuple datums but
1730
+ * uncompressed corresponding index tuple datums. A subsequent heapallindexed
1731
+ * verification will get a logically equivalent though bitwise unequal tuple
1732
+ * from index_form_tuple(). False positive heapallindexed corruption reports
1733
+ * could occur without normalizing away the inconsistency.
1734
+ *
1735
+ * Returned tuple is often caller's own original tuple. Otherwise, it is a
1736
+ * new representation of caller's original index tuple, palloc()'d in caller's
1737
+ * memory context.
1738
+ *
1739
+ * Note: This routine is not concerned with distinctions about the
1740
+ * representation of tuples beyond those that might break heapallindexed
1741
+ * verification. In particular, it won't try to normalize opclass-equal
1742
+ * datums with potentially distinct representations (e.g., btree/numeric_ops
1743
+ * index datums will not get their display scale normalized-away here).
1744
+ * Normalization may need to be expanded to handle more cases in the future,
1745
+ * though. For example, it's possible that non-pivot tuples could in the
1746
+ * future have alternative logically equivalent representations due to using
1747
+ * the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
1748
+ */
1749
+ static IndexTuple
1750
+ bt_normalize_tuple (BtreeCheckState * state , IndexTuple itup )
1751
+ {
1752
+ TupleDesc tupleDescriptor = RelationGetDescr (state -> rel );
1753
+ Datum normalized [INDEX_MAX_KEYS ];
1754
+ bool isnull [INDEX_MAX_KEYS ];
1755
+ bool toast_free [INDEX_MAX_KEYS ];
1756
+ bool formnewtup = false;
1757
+ IndexTuple reformed ;
1758
+ int i ;
1759
+
1760
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
1761
+ if (!IndexTupleHasVarwidths (itup ))
1762
+ return itup ;
1763
+
1764
+ for (i = 0 ; i < tupleDescriptor -> natts ; i ++ )
1765
+ {
1766
+ Form_pg_attribute att ;
1767
+
1768
+ att = TupleDescAttr (tupleDescriptor , i );
1769
+
1770
+ /* Assume untoasted/already normalized datum initially */
1771
+ toast_free [i ] = false;
1772
+ normalized [i ] = index_getattr (itup , att -> attnum ,
1773
+ tupleDescriptor ,
1774
+ & isnull [i ]);
1775
+ if (att -> attbyval || att -> attlen != -1 || isnull [i ])
1776
+ continue ;
1777
+
1778
+ /*
1779
+ * Callers always pass a tuple that could safely be inserted into the
1780
+ * index without further processing, so an external varlena header
1781
+ * should never be encountered here
1782
+ */
1783
+ if (VARATT_IS_EXTERNAL (DatumGetPointer (normalized [i ])))
1784
+ ereport (ERROR ,
1785
+ (errcode (ERRCODE_INDEX_CORRUPTED ),
1786
+ errmsg ("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"" ,
1787
+ ItemPointerGetBlockNumber (& (itup -> t_tid )),
1788
+ ItemPointerGetOffsetNumber (& (itup -> t_tid )),
1789
+ RelationGetRelationName (state -> rel ))));
1790
+ else if (VARATT_IS_COMPRESSED (DatumGetPointer (normalized [i ])))
1791
+ {
1792
+ formnewtup = true;
1793
+ normalized [i ] = PointerGetDatum (PG_DETOAST_DATUM (normalized [i ]));
1794
+ toast_free [i ] = true;
1795
+ }
1796
+ }
1797
+
1798
+ /* Easier case: Tuple has varlena datums, none of which are compressed */
1799
+ if (!formnewtup )
1800
+ return itup ;
1801
+
1802
+ /*
1803
+ * Hard case: Tuple had compressed varlena datums that necessitate
1804
+ * creating normalized version of the tuple from uncompressed input datums
1805
+ * (normalized input datums). This is rather naive, but shouldn't be
1806
+ * necessary too often.
1807
+ *
1808
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
1809
+ * of normalized input.
1810
+ */
1811
+ reformed = index_form_tuple (tupleDescriptor , normalized , isnull );
1812
+ reformed -> t_tid = itup -> t_tid ;
1813
+
1814
+ /* Cannot leak memory here */
1815
+ for (i = 0 ; i < tupleDescriptor -> natts ; i ++ )
1816
+ if (toast_free [i ])
1817
+ pfree (DatumGetPointer (normalized [i ]));
1818
+
1819
+ return reformed ;
1717
1820
}
1718
1821
1719
1822
/*
0 commit comments