Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2f54166

Browse files
Avoid amcheck inline compression false positives.
The previous tacit assumption that index_form_tuple() hides differences in the TOAST state of its input datums was wrong. Normalize input varlena datums by decompressing compressed values, and forming a new index tuple for fingerprinting using uncompressed inputs. The final normalized representation may actually be compressed once again within index_form_tuple(), though that shouldn't matter. When the original tuple is found to have no datums that are compressed inline, fingerprint the original tuple directly. Normalization avoids false positive reports of corruption in certain cases. For example, the executor can apply toasting with some inline compression to an entire heap tuple because its input has a single external TOAST pointer. Varlena datums for other attributes that are not particularly good candidates for inline compression can be compressed in the heap tuple in passing, without the representation of the same values in index tuples ever receiving concomitant inline compression. Add a test case to recreate the issue in a simpler though less realistic way: by exploiting differences in pg_attribute.attstorage between heap and index relations. This bug was discovered by me during testing of an upcoming set of nbtree enhancements. It was also independently reported by Andreas Kunert, as bug #15597. His test case was rather more realistic than the one I ended up using. Bug: #15597 Discussion: https://postgr.es/m/CAH2-WznrVd9ie+TTJ45nDT+v2nUt6YJwQrT9SebCdQKtAvfPZw@mail.gmail.com Discussion: https://postgr.es/m/15597-294e5d3e7f01c407@postgresql.org Backpatch: 11-, where heapallindexed verification was introduced.
1 parent 45ae203 commit 2f54166

File tree

3 files changed

+162
-23
lines changed

3 files changed

+162
-23
lines changed

contrib/amcheck/expected/check_btree.out

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,30 @@ SELECT bt_index_parent_check('delete_test_table_pkey', true);
140140

141141
(1 row)
142142

143+
--
144+
-- BUG #15597: must not assume consistent input toasting state when forming
145+
-- tuple. Bloom filter must fingerprint normalized index tuple representation.
146+
--
147+
CREATE TABLE toast_bug(buggy text);
148+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
149+
-- pg_attribute entry for toasty.buggy will have plain storage:
150+
CREATE INDEX toasty ON toast_bug(buggy);
151+
-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
152+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
153+
-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
154+
INSERT INTO toast_bug SELECT repeat('a', 2200);
155+
-- Should not get false positive report of corruption:
156+
SELECT bt_index_check('toasty', true);
157+
bt_index_check
158+
----------------
159+
160+
(1 row)
161+
143162
-- cleanup
144163
DROP TABLE bttest_a;
145164
DROP TABLE bttest_b;
146165
DROP TABLE bttest_multi;
147166
DROP TABLE delete_test_table;
167+
DROP TABLE toast_bug;
148168
DROP OWNED BY bttest_role; -- permissions
149169
DROP ROLE bttest_role;

contrib/amcheck/sql/check_btree.sql

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,26 @@ DELETE FROM delete_test_table WHERE a > 10;
8888
VACUUM delete_test_table;
8989
SELECT bt_index_parent_check('delete_test_table_pkey', true);
9090

91+
--
92+
-- BUG #15597: must not assume consistent input toasting state when forming
93+
-- tuple. Bloom filter must fingerprint normalized index tuple representation.
94+
--
95+
CREATE TABLE toast_bug(buggy text);
96+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
97+
-- pg_attribute entry for toasty.buggy will have plain storage:
98+
CREATE INDEX toasty ON toast_bug(buggy);
99+
-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
100+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
101+
-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
102+
INSERT INTO toast_bug SELECT repeat('a', 2200);
103+
-- Should not get false positive report of corruption:
104+
SELECT bt_index_check('toasty', true);
105+
91106
-- cleanup
92107
DROP TABLE bttest_a;
93108
DROP TABLE bttest_b;
94109
DROP TABLE bttest_multi;
95110
DROP TABLE delete_test_table;
111+
DROP TABLE toast_bug;
96112
DROP OWNED BY bttest_role; -- permissions
97113
DROP ROLE bttest_role;

contrib/amcheck/verify_nbtree.c

Lines changed: 126 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
132132
static void bt_tuple_present_callback(Relation index, HeapTuple htup,
133133
Datum *values, bool *isnull,
134134
bool tupleIsAlive, void *checkstate);
135+
static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
136+
IndexTuple itup);
135137
static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
136138
OffsetNumber offset);
137139
static inline bool invariant_leq_offset(BtreeCheckState *state,
@@ -907,7 +909,16 @@ bt_target_page_check(BtreeCheckState *state)
907909

908910
/* Fingerprint leaf page tuples (those that point to the heap) */
909911
if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid))
910-
bloom_add_element(state->filter, (unsigned char *) itup, tupsize);
912+
{
913+
IndexTuple norm;
914+
915+
norm = bt_normalize_tuple(state, itup);
916+
bloom_add_element(state->filter, (unsigned char *) norm,
917+
IndexTupleSize(norm));
918+
/* Be tidy */
919+
if (norm != itup)
920+
pfree(norm);
921+
}
911922

912923
/*
913924
* * High key check *
@@ -1671,35 +1682,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
16711682
bool *isnull, bool tupleIsAlive, void *checkstate)
16721683
{
16731684
BtreeCheckState *state = (BtreeCheckState *) checkstate;
1674-
IndexTuple itup;
1685+
IndexTuple itup, norm;
16751686

16761687
Assert(state->heapallindexed);
16771688

1678-
/*
1679-
* Generate an index tuple for fingerprinting.
1680-
*
1681-
* Index tuple formation is assumed to be deterministic, and IndexTuples
1682-
* are assumed immutable. While the LP_DEAD bit is mutable in leaf pages,
1683-
* that's ItemId metadata, which was not fingerprinted. (There will often
1684-
* be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
1685-
* but we only try to detect the absence of needed tuples, so that's
1686-
* okay.)
1687-
*
1688-
* Note that we rely on deterministic index_form_tuple() TOAST
1689-
* compression. If index_form_tuple() was ever enhanced to compress datums
1690-
* out-of-line, or otherwise varied when or how compression was applied,
1691-
* our assumption would break, leading to false positive reports of
1692-
* corruption. It's also possible that non-pivot tuples could in the
1693-
* future have alternative equivalent representations (e.g. by using the
1694-
* INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
1695-
* values as part of fingerprinting.
1696-
*/
1689+
/* Generate a normalized index tuple for fingerprinting */
16971690
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
16981691
itup->t_tid = htup->t_self;
1692+
norm = bt_normalize_tuple(state, itup);
16991693

17001694
/* Probe Bloom filter -- tuple should be present */
1701-
if (bloom_lacks_element(state->filter, (unsigned char *) itup,
1702-
IndexTupleSize(itup)))
1695+
if (bloom_lacks_element(state->filter, (unsigned char *) norm,
1696+
IndexTupleSize(norm)))
17031697
ereport(ERROR,
17041698
(errcode(ERRCODE_DATA_CORRUPTED),
17051699
errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
@@ -1713,6 +1707,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
17131707

17141708
state->heaptuplespresent++;
17151709
pfree(itup);
1710+
/* Cannot leak memory here */
1711+
if (norm != itup)
1712+
pfree(norm);
1713+
}
1714+
1715+
/*
1716+
* Normalize an index tuple for fingerprinting.
1717+
*
1718+
* In general, index tuple formation is assumed to be deterministic by
1719+
* heapallindexed verification, and IndexTuples are assumed immutable. While
1720+
* the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
1721+
* not fingerprinted. Normalization is required to compensate for corner
1722+
* cases where the determinism assumption doesn't quite work.
1723+
*
1724+
* There is currently one such case: index_form_tuple() does not try to hide
1725+
* the source TOAST state of input datums. The executor applies TOAST
1726+
* compression for heap tuples based on different criteria to the compression
1727+
* applied within btinsert()'s call to index_form_tuple(): it sometimes
1728+
* compresses more aggressively, resulting in compressed heap tuple datums but
1729+
* uncompressed corresponding index tuple datums. A subsequent heapallindexed
1730+
* verification will get a logically equivalent though bitwise unequal tuple
1731+
* from index_form_tuple(). False positive heapallindexed corruption reports
1732+
* could occur without normalizing away the inconsistency.
1733+
*
1734+
* Returned tuple is often caller's own original tuple. Otherwise, it is a
1735+
* new representation of caller's original index tuple, palloc()'d in caller's
1736+
* memory context.
1737+
*
1738+
* Note: This routine is not concerned with distinctions about the
1739+
* representation of tuples beyond those that might break heapallindexed
1740+
* verification. In particular, it won't try to normalize opclass-equal
1741+
* datums with potentially distinct representations (e.g., btree/numeric_ops
1742+
* index datums will not get their display scale normalized-away here).
1743+
* Normalization may need to be expanded to handle more cases in the future,
1744+
* though. For example, it's possible that non-pivot tuples could in the
1745+
* future have alternative logically equivalent representations due to using
1746+
* the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
1747+
*/
1748+
static IndexTuple
1749+
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
1750+
{
1751+
TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
1752+
Datum normalized[INDEX_MAX_KEYS];
1753+
bool isnull[INDEX_MAX_KEYS];
1754+
bool toast_free[INDEX_MAX_KEYS];
1755+
bool formnewtup = false;
1756+
IndexTuple reformed;
1757+
int i;
1758+
1759+
/* Easy case: It's immediately clear that tuple has no varlena datums */
1760+
if (!IndexTupleHasVarwidths(itup))
1761+
return itup;
1762+
1763+
for (i = 0; i < tupleDescriptor->natts; i++)
1764+
{
1765+
Form_pg_attribute att;
1766+
1767+
att = TupleDescAttr(tupleDescriptor, i);
1768+
1769+
/* Assume untoasted/already normalized datum initially */
1770+
toast_free[i] = false;
1771+
normalized[i] = index_getattr(itup, att->attnum,
1772+
tupleDescriptor,
1773+
&isnull[i]);
1774+
if (att->attbyval || att->attlen != -1 || isnull[i])
1775+
continue;
1776+
1777+
/*
1778+
* Callers always pass a tuple that could safely be inserted into the
1779+
* index without further processing, so an external varlena header
1780+
* should never be encountered here
1781+
*/
1782+
if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
1783+
ereport(ERROR,
1784+
(errcode(ERRCODE_INDEX_CORRUPTED),
1785+
errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
1786+
ItemPointerGetBlockNumber(&(itup->t_tid)),
1787+
ItemPointerGetOffsetNumber(&(itup->t_tid)),
1788+
RelationGetRelationName(state->rel))));
1789+
else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
1790+
{
1791+
formnewtup = true;
1792+
normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
1793+
toast_free[i] = true;
1794+
}
1795+
}
1796+
1797+
/* Easier case: Tuple has varlena datums, none of which are compressed */
1798+
if (!formnewtup)
1799+
return itup;
1800+
1801+
/*
1802+
* Hard case: Tuple had compressed varlena datums that necessitate
1803+
* creating normalized version of the tuple from uncompressed input datums
1804+
* (normalized input datums). This is rather naive, but shouldn't be
1805+
* necessary too often.
1806+
*
1807+
* Note that we rely on deterministic index_form_tuple() TOAST compression
1808+
* of normalized input.
1809+
*/
1810+
reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
1811+
reformed->t_tid = itup->t_tid;
1812+
1813+
/* Cannot leak memory here */
1814+
for (i = 0; i < tupleDescriptor->natts; i++)
1815+
if (toast_free[i])
1816+
pfree(DatumGetPointer(normalized[i]));
1817+
1818+
return reformed;
17161819
}
17171820

17181821
/*

0 commit comments

Comments
 (0)