Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1ebdec8

Browse files
committed
Rethink handling of pass-by-value leaf datums in SP-GiST.
The existing convention in SP-GiST is that any pass-by-value datatype is stored in Datum representation, i.e. it's of width sizeof(Datum) even when typlen is less than that. This is okay, or at least it's too late to change it, for prefix datums and node-label datums in inner (upper) tuples. But it's problematic for leaf datums, because we'd prefer those to be stored in Postgres' standard on-disk representation so that we can easily extend leaf tuples to carry additional "included" columns. I believe, however, that we can get away with just up and changing that. This would be an unacceptable on-disk-format break, but there are two big mitigating factors: 1. It seems quite unlikely that there are any SP-GiST opclasses out there that use pass-by-value leaf datatypes. Certainly none of the ones in core do, nor has codesearch.debian.net heard of any. Given what SP-GiST is good for, it's hard to conceive of a use-case where the leaf-level values would be both small and fixed-width. (As an example, if you wanted to index text values with the leaf level being just a byte, then every text string would have to be represented with one level of inner tuple per preceding byte, which would be horrendously space-inefficient and slow to access. You always want to use as few inner-tuple levels as possible, leaving as much as possible in the leaf values.) 2. Even granting that you have such an index, this change only breaks things on big-endian machines. On little-endian, the high order bytes of the Datum format will now just appear to be alignment padding space. So, change the code to store pass-by-value leaf datums in their usual on-disk form. Inner-tuple datums are not touched. This is extracted from a larger patch that intends to add support for "included" columns. I'm committing it separately for visibility in our commit logs. Pavel Borisov and Tom Lane, reviewed by Andrey Borodin Discussion: https://postgr.es/m/CALT9ZEFi-vMp4faht9f9Junb1nO3NOSjhpxTmbm1UGLMsLqiEQ@mail.gmail.com
1 parent c9c41c7 commit 1ebdec8

File tree

3 files changed

+94
-49
lines changed

3 files changed

+94
-49
lines changed

src/backend/access/spgist/spgdoinsert.c

+21-20
Original file line numberDiff line numberDiff line change
@@ -731,13 +731,6 @@ doPickSplit(Relation index, SpGistState *state,
731731
* also, count up the amount of space that will be freed from current.
732732
* (Note that in the non-root case, we won't actually delete the old
733733
* tuples, only replace them with redirects or placeholders.)
734-
*
735-
* Note: the SGLTDATUM calls here are safe even when dealing with a nulls
736-
* page. For a pass-by-value data type we will fetch a word that must
737-
* exist even though it may contain garbage (because of the fact that leaf
738-
* tuples must have size at least SGDTSIZE). For a pass-by-reference type
739-
* we are just computing a pointer that isn't going to get dereferenced.
740-
* So it's not worth guarding the calls with isNulls checks.
741734
*/
742735
nToInsert = 0;
743736
nToDelete = 0;
@@ -757,7 +750,8 @@ doPickSplit(Relation index, SpGistState *state,
757750
PageGetItemId(current->page, i));
758751
if (it->tupstate == SPGIST_LIVE)
759752
{
760-
in.datums[nToInsert] = SGLTDATUM(it, state);
753+
in.datums[nToInsert] =
754+
isNulls ? (Datum) 0 : SGLTDATUM(it, state);
761755
heapPtrs[nToInsert] = it->heapPtr;
762756
nToInsert++;
763757
toDelete[nToDelete] = i;
@@ -782,7 +776,8 @@ doPickSplit(Relation index, SpGistState *state,
782776
PageGetItemId(current->page, i));
783777
if (it->tupstate == SPGIST_LIVE)
784778
{
785-
in.datums[nToInsert] = SGLTDATUM(it, state);
779+
in.datums[nToInsert] =
780+
isNulls ? (Datum) 0 : SGLTDATUM(it, state);
786781
heapPtrs[nToInsert] = it->heapPtr;
787782
nToInsert++;
788783
toDelete[nToDelete] = i;
@@ -814,7 +809,8 @@ doPickSplit(Relation index, SpGistState *state,
814809
* space to include it; and in any case it has to be included in the input
815810
* for the picksplit function. So don't increment nToInsert yet.
816811
*/
817-
in.datums[in.nTuples] = SGLTDATUM(newLeafTuple, state);
812+
in.datums[in.nTuples] =
813+
isNulls ? (Datum) 0 : SGLTDATUM(newLeafTuple, state);
818814
heapPtrs[in.nTuples] = newLeafTuple->heapPtr;
819815
in.nTuples++;
820816

@@ -1940,17 +1936,21 @@ spgdoinsert(Relation index, SpGistState *state,
19401936
leafDatum = (Datum) 0;
19411937

19421938
/*
1943-
* Compute space needed for a leaf tuple containing the given datum.
1944-
*
1945-
* If it isn't gonna fit, and the opclass can't reduce the datum size by
1946-
* suffixing, bail out now rather than getting into an endless loop.
1939+
* Compute space needed for a leaf tuple containing the given datum. This
1940+
* must match spgFormLeafTuple.
19471941
*/
1942+
leafSize = SGLTHDRSZ;
19481943
if (!isnull)
1949-
leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
1950-
SpGistGetTypeSize(&state->attLeafType, leafDatum);
1951-
else
1952-
leafSize = SGDTSIZE + sizeof(ItemIdData);
1944+
leafSize += SpGistGetLeafTypeSize(&state->attLeafType, leafDatum);
1945+
if (leafSize < SGDTSIZE)
1946+
leafSize = SGDTSIZE;
1947+
/* Account for an item pointer, too */
1948+
leafSize += sizeof(ItemIdData);
19531949

1950+
/*
1951+
* If it isn't gonna fit, and the opclass can't reduce the datum size by
1952+
* suffixing, bail out now rather than getting into an endless loop.
1953+
*/
19541954
if (leafSize > SPGIST_PAGE_CAPACITY && !state->config.longValuesOK)
19551955
ereport(ERROR,
19561956
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
@@ -2161,8 +2161,9 @@ spgdoinsert(Relation index, SpGistState *state,
21612161
if (!isnull)
21622162
{
21632163
leafDatum = out.result.matchNode.restDatum;
2164-
leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
2165-
SpGistGetTypeSize(&state->attLeafType, leafDatum);
2164+
leafSize = SGLTHDRSZ +
2165+
SpGistGetLeafTypeSize(&state->attLeafType, leafDatum);
2166+
leafSize += sizeof(ItemIdData);
21662167
}
21672168

21682169
/*

src/backend/access/spgist/spgutils.c

+50-12
Original file line numberDiff line numberDiff line change
@@ -603,13 +603,14 @@ spgoptions(Datum reloptions, bool validate)
603603
}
604604

605605
/*
606-
* Get the space needed to store a non-null datum of the indicated type.
606+
* Get the space needed to store a non-null datum of the indicated type
607+
* in an inner tuple (that is, as a prefix or node label).
607608
* Note the result is already rounded up to a MAXALIGN boundary.
608-
* Also, we follow the SPGiST convention that pass-by-val types are
609-
* just stored in their Datum representation (compare memcpyDatum).
609+
* Here we follow the convention that pass-by-val types are just stored
610+
* in their Datum representation (compare memcpyInnerDatum).
610611
*/
611612
unsigned int
612-
SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum)
613+
SpGistGetInnerTypeSize(SpGistTypeDesc *att, Datum datum)
613614
{
614615
unsigned int size;
615616

@@ -624,10 +625,28 @@ SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum)
624625
}
625626

626627
/*
627-
* Copy the given non-null datum to *target
628+
* Get the space needed to store a non-null datum of the indicated type
629+
* in a leaf tuple. This is just the usual storage space for the type,
630+
* but rounded up to a MAXALIGN boundary.
631+
*/
632+
unsigned int
633+
SpGistGetLeafTypeSize(SpGistTypeDesc *att, Datum datum)
634+
{
635+
unsigned int size;
636+
637+
if (att->attlen > 0)
638+
size = att->attlen;
639+
else
640+
size = VARSIZE_ANY(datum);
641+
642+
return MAXALIGN(size);
643+
}
644+
645+
/*
646+
* Copy the given non-null datum to *target, in the inner-tuple case
628647
*/
629648
static void
630-
memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum)
649+
memcpyInnerDatum(void *target, SpGistTypeDesc *att, Datum datum)
631650
{
632651
unsigned int size;
633652

@@ -642,6 +661,25 @@ memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum)
642661
}
643662
}
644663

664+
/*
665+
* Copy the given non-null datum to *target, in the leaf-tuple case
666+
*/
667+
static void
668+
memcpyLeafDatum(void *target, SpGistTypeDesc *att, Datum datum)
669+
{
670+
unsigned int size;
671+
672+
if (att->attbyval)
673+
{
674+
store_att_byval(target, datum, att->attlen);
675+
}
676+
else
677+
{
678+
size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum);
679+
memcpy(target, DatumGetPointer(datum), size);
680+
}
681+
}
682+
645683
/*
646684
* Construct a leaf tuple containing the given heap TID and datum value
647685
*/
@@ -655,7 +693,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
655693
/* compute space needed (note result is already maxaligned) */
656694
size = SGLTHDRSZ;
657695
if (!isnull)
658-
size += SpGistGetTypeSize(&state->attLeafType, datum);
696+
size += SpGistGetLeafTypeSize(&state->attLeafType, datum);
659697

660698
/*
661699
* Ensure that we can replace the tuple with a dead tuple later. This
@@ -671,7 +709,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
671709
tup->nextOffset = InvalidOffsetNumber;
672710
tup->heapPtr = *heapPtr;
673711
if (!isnull)
674-
memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
712+
memcpyLeafDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
675713

676714
return tup;
677715
}
@@ -692,7 +730,7 @@ spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
692730
/* compute space needed (note result is already maxaligned) */
693731
size = SGNTHDRSZ;
694732
if (!isnull)
695-
size += SpGistGetTypeSize(&state->attLabelType, label);
733+
size += SpGistGetInnerTypeSize(&state->attLabelType, label);
696734

697735
/*
698736
* Here we make sure that the size will fit in the field reserved for it
@@ -716,7 +754,7 @@ spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
716754
ItemPointerSetInvalid(&tup->t_tid);
717755

718756
if (!isnull)
719-
memcpyDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
757+
memcpyInnerDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
720758

721759
return tup;
722760
}
@@ -736,7 +774,7 @@ spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
736774

737775
/* Compute size needed */
738776
if (hasPrefix)
739-
prefixSize = SpGistGetTypeSize(&state->attPrefixType, prefix);
777+
prefixSize = SpGistGetInnerTypeSize(&state->attPrefixType, prefix);
740778
else
741779
prefixSize = 0;
742780

@@ -781,7 +819,7 @@ spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
781819
tup->size = size;
782820

783821
if (hasPrefix)
784-
memcpyDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
822+
memcpyInnerDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
785823

786824
ptr = (char *) SGITNODEPTR(tup);
787825

src/include/access/spgist_private.h

+23-17
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,14 @@ typedef struct SpGistCache
266266
* header/optional prefix/array of nodes, which are SpGistNodeTuples
267267
*
268268
* size and prefixSize must be multiples of MAXALIGN
269+
*
270+
* If the prefix datum is of a pass-by-value type, it is stored in its
271+
* Datum representation, that is its on-disk representation is of length
272+
* sizeof(Datum). This is a fairly unfortunate choice, because in no other
273+
* place does Postgres use Datum as an on-disk representation; it creates
274+
* an unnecessary incompatibility between 32-bit and 64-bit builds. But the
275+
* compatibility loss is mostly theoretical since MAXIMUM_ALIGNOF typically
276+
* differs between such builds, too. Anyway we're stuck with it now.
269277
*/
270278
typedef struct SpGistInnerTupleData
271279
{
@@ -307,8 +315,7 @@ typedef SpGistInnerTupleData *SpGistInnerTuple;
307315
* Node tuples use the same header as ordinary Postgres IndexTuples, but
308316
* we do not use a null bitmap, because we know there is only one column
309317
* so the INDEX_NULL_MASK bit suffices. Also, pass-by-value datums are
310-
* stored as a full Datum, the same convention as for inner tuple prefixes
311-
* and leaf tuple datums.
318+
* stored in Datum form, the same convention as for inner tuple prefixes.
312319
*/
313320

314321
typedef IndexTupleData SpGistNodeTupleData;
@@ -322,12 +329,15 @@ typedef SpGistNodeTupleData *SpGistNodeTuple;
322329
PointerGetDatum(SGNTDATAPTR(x)))
323330

324331
/*
325-
* SPGiST leaf tuple: carries a datum and a heap tuple TID
332+
* SPGiST leaf tuple: carries a leaf datum and a heap tuple TID
326333
*
327-
* In the simplest case, the datum is the same as the indexed value; but
328-
* it could also be a suffix or some other sort of delta that permits
334+
* In the simplest case, the leaf datum is the same as the indexed value;
335+
* but it could also be a suffix or some other sort of delta that permits
329336
* reconstruction given knowledge of the prefix path traversed to get here.
330337
*
338+
* If the leaf datum is NULL, it's not stored. This is not represented
339+
* explicitly; we infer it from the tuple being stored on a nulls page.
340+
*
331341
* The size field is wider than could possibly be needed for an on-disk leaf
332342
* tuple, but this allows us to form leaf tuples even when the datum is too
333343
* wide to be stored immediately, and it costs nothing because of alignment
@@ -339,30 +349,25 @@ typedef SpGistNodeTupleData *SpGistNodeTuple;
339349
*
340350
* size must be a multiple of MAXALIGN; also, it must be at least SGDTSIZE
341351
* so that the tuple can be converted to REDIRECT status later. (This
342-
* restriction only adds bytes for the null-datum case, otherwise alignment
343-
* restrictions force it anyway.)
344-
*
345-
* In a leaf tuple for a NULL indexed value, there's no useful datum value;
346-
* however, the SGDTSIZE limit ensures that's there's a Datum word there
347-
* anyway, so SGLTDATUM can be applied safely as long as you don't do
348-
* anything with the result.
352+
* restriction only adds bytes for a NULL leaf datum stored on a 32-bit
353+
* machine; otherwise alignment restrictions force it anyway.)
349354
*/
350355
typedef struct SpGistLeafTupleData
351356
{
352357
unsigned int tupstate:2, /* LIVE/REDIRECT/DEAD/PLACEHOLDER */
353358
size:30; /* large enough for any palloc'able value */
354359
OffsetNumber nextOffset; /* next tuple in chain, or InvalidOffsetNumber */
355360
ItemPointerData heapPtr; /* TID of represented heap tuple */
356-
/* leaf datum follows */
361+
/* leaf datum follows on a MAXALIGN boundary */
357362
} SpGistLeafTupleData;
358363

359364
typedef SpGistLeafTupleData *SpGistLeafTuple;
360365

361366
#define SGLTHDRSZ MAXALIGN(sizeof(SpGistLeafTupleData))
362367
#define SGLTDATAPTR(x) (((char *) (x)) + SGLTHDRSZ)
363-
#define SGLTDATUM(x, s) ((s)->attLeafType.attbyval ? \
364-
*(Datum *) SGLTDATAPTR(x) : \
365-
PointerGetDatum(SGLTDATAPTR(x)))
368+
#define SGLTDATUM(x, s) fetch_att(SGLTDATAPTR(x), \
369+
(s)->attLeafType.attbyval, \
370+
(s)->attLeafType.attlen)
366371

367372
/*
368373
* SPGiST dead tuple: declaration for examining non-live tuples
@@ -455,7 +460,8 @@ extern void SpGistSetLastUsedPage(Relation index, Buffer buffer);
455460
extern void SpGistInitPage(Page page, uint16 f);
456461
extern void SpGistInitBuffer(Buffer b, uint16 f);
457462
extern void SpGistInitMetapage(Page page);
458-
extern unsigned int SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum);
463+
extern unsigned int SpGistGetInnerTypeSize(SpGistTypeDesc *att, Datum datum);
464+
extern unsigned int SpGistGetLeafTypeSize(SpGistTypeDesc *att, Datum datum);
459465
extern SpGistLeafTuple spgFormLeafTuple(SpGistState *state,
460466
ItemPointer heapPtr,
461467
Datum datum, bool isnull);

0 commit comments

Comments
 (0)