Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7f52e0c

Browse files
committed
Tweak writetup_heap/readtup_heap to avoid storing the tuple identity
and transaction visibility fields of tuples being sorted. These are always uninteresting in a tuple being sorted (if the fields were actually selected, they'd have been pulled out into user columns beforehand). This saves about 24 bytes per row being sorted, which is a useful savings for any but the widest of sort rows. Per recent discussion.
1 parent 672f0be commit 7f52e0c

File tree

2 files changed

+91
-20
lines changed

2 files changed

+91
-20
lines changed

src/backend/utils/sort/tuplesort.c

Lines changed: 85 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
* Portions Copyright (c) 1994, Regents of the University of California
9292
*
9393
* IDENTIFICATION
94-
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.65 2006/03/10 23:19:00 tgl Exp $
94+
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.66 2006/05/23 21:37:59 tgl Exp $
9595
*
9696
*-------------------------------------------------------------------------
9797
*/
@@ -2329,23 +2329,53 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
23292329
}
23302330

23312331
/*
2332-
* We don't bother to write the HeapTupleData part of the tuple.
2332+
* When writing HeapTuples to tape, we strip off all tuple identity and
2333+
* transaction visibility information, because those fields aren't really
2334+
* interesting for in-memory tuples (they may or may not be valid in the
2335+
* incoming tuples, depending on the plan that's feeding the sort). We
2336+
* only need to store t_natts, t_infomask, the nulls bitmap if any, and
2337+
* the user data.
2338+
*
2339+
* You might think that we could omit storing t_natts, but you'd be wrong:
2340+
* the incoming tuple might be a physical disk tuple with fewer columns
2341+
* than the table's current logical tupdesc.
23332342
*/
2343+
typedef struct TapeTupleHeader
2344+
{
2345+
unsigned int tuplen; /* required header of a tape item */
2346+
int16 natts; /* number of attributes */
2347+
uint16 infomask; /* various flag bits */
2348+
/* nulls bitmap follows if HEAP_HASNULL, then actual tuple data */
2349+
} TapeTupleHeader;
23342350

23352351
static void
23362352
writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup)
23372353
{
23382354
HeapTuple tuple = (HeapTuple) stup->tuple;
2339-
unsigned int tuplen;
2340-
2341-
tuplen = tuple->t_len + sizeof(tuplen);
2355+
HeapTupleHeader t_data = tuple->t_data;
2356+
TapeTupleHeader tapehdr;
2357+
unsigned int datalen;
2358+
unsigned int nullslen;
2359+
2360+
Assert(tuple->t_len >= t_data->t_hoff);
2361+
datalen = tuple->t_len - t_data->t_hoff;
2362+
if (HeapTupleHasNulls(tuple))
2363+
nullslen = BITMAPLEN(t_data->t_natts);
2364+
else
2365+
nullslen = 0;
2366+
tapehdr.tuplen = sizeof(TapeTupleHeader) + nullslen + datalen;
2367+
tapehdr.natts = t_data->t_natts;
2368+
tapehdr.infomask = t_data->t_infomask;
23422369
LogicalTapeWrite(state->tapeset, tapenum,
2343-
(void *) &tuplen, sizeof(tuplen));
2370+
(void *) &tapehdr, sizeof(tapehdr));
2371+
if (nullslen)
2372+
LogicalTapeWrite(state->tapeset, tapenum,
2373+
(void *) t_data->t_bits, nullslen);
23442374
LogicalTapeWrite(state->tapeset, tapenum,
2345-
(void *) tuple->t_data, tuple->t_len);
2375+
(char *) t_data + t_data->t_hoff, datalen);
23462376
if (state->randomAccess) /* need trailing length word? */
23472377
LogicalTapeWrite(state->tapeset, tapenum,
2348-
(void *) &tuplen, sizeof(tuplen));
2378+
(void *) &tapehdr.tuplen, sizeof(tapehdr.tuplen));
23492379

23502380
FREEMEM(state, GetMemoryChunkSpace(tuple));
23512381
heap_freetuple(tuple);
@@ -2355,22 +2385,59 @@ static void
23552385
readtup_heap(Tuplesortstate *state, SortTuple *stup,
23562386
int tapenum, unsigned int len)
23572387
{
2358-
unsigned int tuplen = len - sizeof(unsigned int) + HEAPTUPLESIZE;
2359-
HeapTuple tuple = (HeapTuple) palloc(tuplen);
2388+
TapeTupleHeader tapehdr;
2389+
unsigned int datalen;
2390+
unsigned int nullslen;
2391+
unsigned int hoff;
2392+
HeapTuple tuple;
2393+
HeapTupleHeader t_data;
23602394

2395+
/* read in the rest of the header */
2396+
if (LogicalTapeRead(state->tapeset, tapenum,
2397+
(char *) &tapehdr + sizeof(unsigned int),
2398+
sizeof(tapehdr) - sizeof(unsigned int)) !=
2399+
sizeof(tapehdr) - sizeof(unsigned int))
2400+
elog(ERROR, "unexpected end of data");
2401+
/* reconstruct lengths of null bitmap and data part */
2402+
if (tapehdr.infomask & HEAP_HASNULL)
2403+
nullslen = BITMAPLEN(tapehdr.natts);
2404+
else
2405+
nullslen = 0;
2406+
datalen = len - sizeof(TapeTupleHeader) - nullslen;
2407+
/* determine overhead size of tuple (should match heap_form_tuple) */
2408+
hoff = offsetof(HeapTupleHeaderData, t_bits) + nullslen;
2409+
if (tapehdr.infomask & HEAP_HASOID)
2410+
hoff += sizeof(Oid);
2411+
hoff = MAXALIGN(hoff);
2412+
/* Allocate the space in one chunk, like heap_form_tuple */
2413+
tuple = (HeapTuple) palloc(HEAPTUPLESIZE + hoff + datalen);
23612414
USEMEM(state, GetMemoryChunkSpace(tuple));
2362-
/* reconstruct the HeapTupleData portion */
2363-
tuple->t_len = len - sizeof(unsigned int);
2415+
t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
2416+
/* make sure unused header fields are zeroed */
2417+
MemSetAligned(t_data, 0, hoff);
2418+
/* reconstruct the HeapTupleData fields */
2419+
tuple->t_len = hoff + datalen;
23642420
ItemPointerSetInvalid(&(tuple->t_self));
23652421
tuple->t_tableOid = InvalidOid;
2366-
tuple->t_data = (HeapTupleHeader) (((char *) tuple) + HEAPTUPLESIZE);
2367-
/* read in the tuple proper */
2368-
if (LogicalTapeRead(state->tapeset, tapenum, (void *) tuple->t_data,
2369-
tuple->t_len) != tuple->t_len)
2422+
tuple->t_data = t_data;
2423+
/* reconstruct the HeapTupleHeaderData fields */
2424+
ItemPointerSetInvalid(&(t_data->t_ctid));
2425+
t_data->t_natts = tapehdr.natts;
2426+
t_data->t_infomask = (tapehdr.infomask & ~HEAP_XACT_MASK)
2427+
| (HEAP_XMIN_INVALID | HEAP_XMAX_INVALID);
2428+
t_data->t_hoff = hoff;
2429+
/* read in the null bitmap if any */
2430+
if (nullslen)
2431+
if (LogicalTapeRead(state->tapeset, tapenum,
2432+
(void *) t_data->t_bits, nullslen) != nullslen)
2433+
elog(ERROR, "unexpected end of data");
2434+
/* and the data proper */
2435+
if (LogicalTapeRead(state->tapeset, tapenum,
2436+
(char *) t_data + hoff, datalen) != datalen)
23702437
elog(ERROR, "unexpected end of data");
23712438
if (state->randomAccess) /* need trailing length word? */
2372-
if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tuplen,
2373-
sizeof(tuplen)) != sizeof(tuplen))
2439+
if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tapehdr.tuplen,
2440+
sizeof(tapehdr.tuplen)) != sizeof(tapehdr.tuplen))
23742441
elog(ERROR, "unexpected end of data");
23752442
stup->tuple = (void *) tuple;
23762443
/* set up first-column key value */

src/include/utils/tuplesort.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
1414
* Portions Copyright (c) 1994, Regents of the University of California
1515
*
16-
* $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.19 2006/03/05 15:59:08 momjian Exp $
16+
* $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.20 2006/05/23 21:37:59 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -32,7 +32,11 @@ typedef struct Tuplesortstate Tuplesortstate;
3232
* We provide two different interfaces to what is essentially the same
3333
* code: one for sorting HeapTuples and one for sorting IndexTuples.
3434
* They differ primarily in the way that the sort key information is
35-
* supplied.
35+
* supplied. Also, tuplesort.c guarantees to preserve all the header
36+
* fields of an IndexTuple, but when sorting HeapTuples only the user data
37+
* is guaranteed preserved, not the "system columns" (tuple identity and
38+
* transaction visibility info).
39+
*
3640
* Yet a third slightly different interface supports sorting bare Datums.
3741
*/
3842

0 commit comments

Comments
 (0)