Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit eba7753

Browse files
Avoid amcheck inline compression false positives.
The previous tacit assumption that index_form_tuple() hides differences in the TOAST state of its input datums was wrong. Normalize input varlena datums by decompressing compressed values, and forming a new index tuple for fingerprinting using uncompressed inputs. The final normalized representation may actually be compressed once again within index_form_tuple(), though that shouldn't matter. When the original tuple is found to have no datums that are compressed inline, fingerprint the original tuple directly. Normalization avoids false positive reports of corruption in certain cases. For example, the executor can apply toasting with some inline compression to an entire heap tuple because its input has a single external TOAST pointer. Varlena datums for other attributes that are not particularly good candidates for inline compression can be compressed in the heap tuple in passing, without the representation of the same values in index tuples ever receiving concomitant inline compression. Add a test case to recreate the issue in a simpler though less realistic way: by exploiting differences in pg_attribute.attstorage between heap and index relations. This bug was discovered by me during testing of an upcoming set of nbtree enhancements. It was also independently reported by Andreas Kunert, as bug #15597. His test case was rather more realistic than the one I ended up using. Bug: #15597 Discussion: https://postgr.es/m/CAH2-WznrVd9ie+TTJ45nDT+v2nUt6YJwQrT9SebCdQKtAvfPZw@mail.gmail.com Discussion: https://postgr.es/m/15597-294e5d3e7f01c407@postgresql.org Backpatch: 11-, where heapallindexed verification was introduced.
1 parent 727921f commit eba7753

File tree

3 files changed

+162
-23
lines changed

3 files changed

+162
-23
lines changed

contrib/amcheck/expected/check_btree.out

+20
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,30 @@ SELECT bt_index_parent_check('delete_test_table_pkey', true);
140140

141141
(1 row)
142142

143+
--
144+
-- BUG #15597: must not assume consistent input toasting state when forming
145+
-- tuple. Bloom filter must fingerprint normalized index tuple representation.
146+
--
147+
CREATE TABLE toast_bug(buggy text);
148+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
149+
-- pg_attribute entry for toasty.buggy will have plain storage:
150+
CREATE INDEX toasty ON toast_bug(buggy);
151+
-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
152+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
153+
-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
154+
INSERT INTO toast_bug SELECT repeat('a', 2200);
155+
-- Should not get false positive report of corruption:
156+
SELECT bt_index_check('toasty', true);
157+
bt_index_check
158+
----------------
159+
160+
(1 row)
161+
143162
-- cleanup
144163
DROP TABLE bttest_a;
145164
DROP TABLE bttest_b;
146165
DROP TABLE bttest_multi;
147166
DROP TABLE delete_test_table;
167+
DROP TABLE toast_bug;
148168
DROP OWNED BY bttest_role; -- permissions
149169
DROP ROLE bttest_role;

contrib/amcheck/sql/check_btree.sql

+16
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,26 @@ DELETE FROM delete_test_table WHERE a > 10;
8888
VACUUM delete_test_table;
8989
SELECT bt_index_parent_check('delete_test_table_pkey', true);
9090

91+
--
92+
-- BUG #15597: must not assume consistent input toasting state when forming
93+
-- tuple. Bloom filter must fingerprint normalized index tuple representation.
94+
--
95+
CREATE TABLE toast_bug(buggy text);
96+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
97+
-- pg_attribute entry for toasty.buggy will have plain storage:
98+
CREATE INDEX toasty ON toast_bug(buggy);
99+
-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
100+
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
101+
-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
102+
INSERT INTO toast_bug SELECT repeat('a', 2200);
103+
-- Should not get false positive report of corruption:
104+
SELECT bt_index_check('toasty', true);
105+
91106
-- cleanup
92107
DROP TABLE bttest_a;
93108
DROP TABLE bttest_b;
94109
DROP TABLE bttest_multi;
95110
DROP TABLE delete_test_table;
111+
DROP TABLE toast_bug;
96112
DROP OWNED BY bttest_role; -- permissions
97113
DROP ROLE bttest_role;

contrib/amcheck/verify_nbtree.c

+126-23
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
133133
static void bt_tuple_present_callback(Relation index, HeapTuple htup,
134134
Datum *values, bool *isnull,
135135
bool tupleIsAlive, void *checkstate);
136+
static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
137+
IndexTuple itup);
136138
static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
137139
OffsetNumber offset);
138140
static inline bool invariant_leq_offset(BtreeCheckState *state,
@@ -908,7 +910,16 @@ bt_target_page_check(BtreeCheckState *state)
908910

909911
/* Fingerprint leaf page tuples (those that point to the heap) */
910912
if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid))
911-
bloom_add_element(state->filter, (unsigned char *) itup, tupsize);
913+
{
914+
IndexTuple norm;
915+
916+
norm = bt_normalize_tuple(state, itup);
917+
bloom_add_element(state->filter, (unsigned char *) norm,
918+
IndexTupleSize(norm));
919+
/* Be tidy */
920+
if (norm != itup)
921+
pfree(norm);
922+
}
912923

913924
/*
914925
* * High key check *
@@ -1672,35 +1683,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
16721683
bool *isnull, bool tupleIsAlive, void *checkstate)
16731684
{
16741685
BtreeCheckState *state = (BtreeCheckState *) checkstate;
1675-
IndexTuple itup;
1686+
IndexTuple itup, norm;
16761687

16771688
Assert(state->heapallindexed);
16781689

1679-
/*
1680-
* Generate an index tuple for fingerprinting.
1681-
*
1682-
* Index tuple formation is assumed to be deterministic, and IndexTuples
1683-
* are assumed immutable. While the LP_DEAD bit is mutable in leaf pages,
1684-
* that's ItemId metadata, which was not fingerprinted. (There will often
1685-
* be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
1686-
* but we only try to detect the absence of needed tuples, so that's
1687-
* okay.)
1688-
*
1689-
* Note that we rely on deterministic index_form_tuple() TOAST
1690-
* compression. If index_form_tuple() was ever enhanced to compress datums
1691-
* out-of-line, or otherwise varied when or how compression was applied,
1692-
* our assumption would break, leading to false positive reports of
1693-
* corruption. It's also possible that non-pivot tuples could in the
1694-
* future have alternative equivalent representations (e.g. by using the
1695-
* INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
1696-
* values as part of fingerprinting.
1697-
*/
1690+
/* Generate a normalized index tuple for fingerprinting */
16981691
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
16991692
itup->t_tid = htup->t_self;
1693+
norm = bt_normalize_tuple(state, itup);
17001694

17011695
/* Probe Bloom filter -- tuple should be present */
1702-
if (bloom_lacks_element(state->filter, (unsigned char *) itup,
1703-
IndexTupleSize(itup)))
1696+
if (bloom_lacks_element(state->filter, (unsigned char *) norm,
1697+
IndexTupleSize(norm)))
17041698
ereport(ERROR,
17051699
(errcode(ERRCODE_DATA_CORRUPTED),
17061700
errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
@@ -1714,6 +1708,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
17141708

17151709
state->heaptuplespresent++;
17161710
pfree(itup);
1711+
/* Cannot leak memory here */
1712+
if (norm != itup)
1713+
pfree(norm);
1714+
}
1715+
1716+
/*
1717+
* Normalize an index tuple for fingerprinting.
1718+
*
1719+
* In general, index tuple formation is assumed to be deterministic by
1720+
* heapallindexed verification, and IndexTuples are assumed immutable. While
1721+
* the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
1722+
* not fingerprinted. Normalization is required to compensate for corner
1723+
* cases where the determinism assumption doesn't quite work.
1724+
*
1725+
* There is currently one such case: index_form_tuple() does not try to hide
1726+
* the source TOAST state of input datums. The executor applies TOAST
1727+
* compression for heap tuples based on different criteria to the compression
1728+
* applied within btinsert()'s call to index_form_tuple(): it sometimes
1729+
* compresses more aggressively, resulting in compressed heap tuple datums but
1730+
* uncompressed corresponding index tuple datums. A subsequent heapallindexed
1731+
* verification will get a logically equivalent though bitwise unequal tuple
1732+
* from index_form_tuple(). False positive heapallindexed corruption reports
1733+
* could occur without normalizing away the inconsistency.
1734+
*
1735+
* Returned tuple is often caller's own original tuple. Otherwise, it is a
1736+
* new representation of caller's original index tuple, palloc()'d in caller's
1737+
* memory context.
1738+
*
1739+
* Note: This routine is not concerned with distinctions about the
1740+
* representation of tuples beyond those that might break heapallindexed
1741+
* verification. In particular, it won't try to normalize opclass-equal
1742+
* datums with potentially distinct representations (e.g., btree/numeric_ops
1743+
* index datums will not get their display scale normalized-away here).
1744+
* Normalization may need to be expanded to handle more cases in the future,
1745+
* though. For example, it's possible that non-pivot tuples could in the
1746+
* future have alternative logically equivalent representations due to using
1747+
* the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
1748+
*/
1749+
static IndexTuple
1750+
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
1751+
{
1752+
TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
1753+
Datum normalized[INDEX_MAX_KEYS];
1754+
bool isnull[INDEX_MAX_KEYS];
1755+
bool toast_free[INDEX_MAX_KEYS];
1756+
bool formnewtup = false;
1757+
IndexTuple reformed;
1758+
int i;
1759+
1760+
/* Easy case: It's immediately clear that tuple has no varlena datums */
1761+
if (!IndexTupleHasVarwidths(itup))
1762+
return itup;
1763+
1764+
for (i = 0; i < tupleDescriptor->natts; i++)
1765+
{
1766+
Form_pg_attribute att;
1767+
1768+
att = TupleDescAttr(tupleDescriptor, i);
1769+
1770+
/* Assume untoasted/already normalized datum initially */
1771+
toast_free[i] = false;
1772+
normalized[i] = index_getattr(itup, att->attnum,
1773+
tupleDescriptor,
1774+
&isnull[i]);
1775+
if (att->attbyval || att->attlen != -1 || isnull[i])
1776+
continue;
1777+
1778+
/*
1779+
* Callers always pass a tuple that could safely be inserted into the
1780+
* index without further processing, so an external varlena header
1781+
* should never be encountered here
1782+
*/
1783+
if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
1784+
ereport(ERROR,
1785+
(errcode(ERRCODE_INDEX_CORRUPTED),
1786+
errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
1787+
ItemPointerGetBlockNumber(&(itup->t_tid)),
1788+
ItemPointerGetOffsetNumber(&(itup->t_tid)),
1789+
RelationGetRelationName(state->rel))));
1790+
else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
1791+
{
1792+
formnewtup = true;
1793+
normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
1794+
toast_free[i] = true;
1795+
}
1796+
}
1797+
1798+
/* Easier case: Tuple has varlena datums, none of which are compressed */
1799+
if (!formnewtup)
1800+
return itup;
1801+
1802+
/*
1803+
* Hard case: Tuple had compressed varlena datums that necessitate
1804+
* creating normalized version of the tuple from uncompressed input datums
1805+
* (normalized input datums). This is rather naive, but shouldn't be
1806+
* necessary too often.
1807+
*
1808+
* Note that we rely on deterministic index_form_tuple() TOAST compression
1809+
* of normalized input.
1810+
*/
1811+
reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
1812+
reformed->t_tid = itup->t_tid;
1813+
1814+
/* Cannot leak memory here */
1815+
for (i = 0; i < tupleDescriptor->natts; i++)
1816+
if (toast_free[i])
1817+
pfree(DatumGetPointer(normalized[i]));
1818+
1819+
return reformed;
17171820
}
17181821

17191822
/*

0 commit comments

Comments
 (0)