Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0b2a45a

Browse files
committed
Compress TID lists when writing GIN tuples to disk
When serializing GIN tuples to tuplesorts during parallel index builds, we can significantly reduce the amount of data by compressing the TID lists. The GIN opclasses may produce a lot of data (depending on how many keys are extracted from each row), and the TID compression is very efficient and effective. If the number of distinct keys is high, the first worker pass (reading data from the table and writing them into a private tuplesort) may not benefit from the compression very much. It is likely to spill data to disk before the TID lists get long enough for the compression to help. The second pass (writing the merged data into the shared tuplesort) is more likely to benefit from compression. The compression can be seen as a way to reduce the amount of disk space needed by the parallel builds, because the data is written twice. First into the per-worker tuplesorts, then into the shared tuplesort. Author: Tomas Vondra Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
1 parent 9b4bdf8 commit 0b2a45a

File tree

2 files changed

+95
-22
lines changed

2 files changed

+95
-22
lines changed

src/backend/access/gin/gininsert.c

Lines changed: 94 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,9 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
190190
Relation heap, Relation index,
191191
int sortmem, bool progress);
192192

193-
static Datum _gin_parse_tuple(GinTuple *a, ItemPointerData **items);
193+
static ItemPointer _gin_parse_tuple_items(GinTuple *a);
194+
static Datum _gin_parse_tuple_key(GinTuple *a);
195+
194196
static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
195197
Datum key, int16 typlen, bool typbyval,
196198
ItemPointerData *items, uint32 nitems,
@@ -1365,7 +1367,8 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
13651367

13661368
AssertCheckGinBuffer(buffer);
13671369

1368-
key = _gin_parse_tuple(tup, &items);
1370+
key = _gin_parse_tuple_key(tup);
1371+
items = _gin_parse_tuple_items(tup);
13691372

13701373
/* if the buffer is empty, set the fields (and copy the key) */
13711374
if (GinBufferIsEmpty(buffer))
@@ -1401,6 +1404,9 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
14011404

14021405
AssertCheckItemPointers(buffer);
14031406
}
1407+
1408+
/* free the decompressed TID list */
1409+
pfree(items);
14041410
}
14051411

14061412
/*
@@ -1955,6 +1961,15 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
19551961
table_close(heapRel, heapLockmode);
19561962
}
19571963

1964+
/*
1965+
* Used to keep track of compressed TID lists when building a GIN tuple.
1966+
*/
1967+
typedef struct
1968+
{
1969+
dlist_node node; /* linked list pointers */
1970+
GinPostingList *seg;
1971+
} GinSegmentInfo;
1972+
19581973
/*
19591974
* _gin_build_tuple
19601975
* Serialize the state for an index key into a tuple for tuplesort.
@@ -1967,6 +1982,11 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
19671982
* like endianess etc. We could make it a little bit smaller, but it's not
19681983
* worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
19691984
* start of the TID list anyway. So we wouldn't save anything.
1985+
*
1986+
* The TID list is serialized as compressed - it's highly compressible, and
1987+
* we already have ginCompressPostingList for this purpose. The list may be
1988+
* pretty long, so we compress it into multiple segments and then copy all
1989+
* of that into the GIN tuple.
19701990
*/
19711991
static GinTuple *
19721992
_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
@@ -1980,6 +2000,11 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
19802000
Size tuplen;
19812001
int keylen;
19822002

2003+
dlist_mutable_iter iter;
2004+
dlist_head segments;
2005+
int ncompressed;
2006+
Size compresslen;
2007+
19832008
/*
19842009
* Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
19852010
* have actual non-empty key. We include varlena headers and \0 bytes for
@@ -2006,12 +2031,34 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
20062031
else
20072032
elog(ERROR, "unexpected typlen value (%d)", typlen);
20082033

2034+
/* compress the item pointers */
2035+
ncompressed = 0;
2036+
compresslen = 0;
2037+
dlist_init(&segments);
2038+
2039+
/* generate compressed segments of TID list chunks */
2040+
while (ncompressed < nitems)
2041+
{
2042+
int cnt;
2043+
GinSegmentInfo *seginfo = palloc(sizeof(GinSegmentInfo));
2044+
2045+
seginfo->seg = ginCompressPostingList(&items[ncompressed],
2046+
(nitems - ncompressed),
2047+
UINT16_MAX,
2048+
&cnt);
2049+
2050+
ncompressed += cnt;
2051+
compresslen += SizeOfGinPostingList(seginfo->seg);
2052+
2053+
dlist_push_tail(&segments, &seginfo->node);
2054+
}
2055+
20092056
/*
20102057
* Determine GIN tuple length with all the data included. Be careful about
2011-
* alignment, to allow direct access to item pointers.
2058+
* alignment, to allow direct access to compressed segments (those require
2059+
* only SHORTALIGN).
20122060
*/
2013-
tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) +
2014-
(sizeof(ItemPointerData) * nitems);
2061+
tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
20152062

20162063
*len = tuplen;
20172064

@@ -2061,37 +2108,40 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
20612108
/* finally, copy the TIDs into the array */
20622109
ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
20632110

2064-
memcpy(ptr, items, sizeof(ItemPointerData) * nitems);
2111+
/* copy in the compressed data, and free the segments */
2112+
dlist_foreach_modify(iter, &segments)
2113+
{
2114+
GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);
2115+
2116+
memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
2117+
2118+
ptr += SizeOfGinPostingList(seginfo->seg);
2119+
2120+
dlist_delete(&seginfo->node);
2121+
2122+
pfree(seginfo->seg);
2123+
pfree(seginfo);
2124+
}
20652125

20662126
return tuple;
20672127
}
20682128

20692129
/*
2070-
* _gin_parse_tuple
2071-
* Deserialize the tuple from the tuplestore representation.
2130+
* _gin_parse_tuple_key
2131+
* Return a Datum representing the key stored in the tuple.
20722132
*
2073-
* Most of the fields are actually directly accessible, the only thing that
2133+
* Most of the tuple fields are directly accessible, the only thing that
20742134
* needs more care is the key and the TID list.
20752135
*
20762136
* For the key, this returns a regular Datum representing it. It's either the
20772137
* actual key value, or a pointer to the beginning of the data array (which is
20782138
* where the data was copied by _gin_build_tuple).
2079-
*
2080-
* The pointer to the TID list is returned through 'items' (which is simply
2081-
* a pointer to the data array).
20822139
*/
20832140
static Datum
2084-
_gin_parse_tuple(GinTuple *a, ItemPointerData **items)
2141+
_gin_parse_tuple_key(GinTuple *a)
20852142
{
20862143
Datum key;
20872144

2088-
if (items)
2089-
{
2090-
char *ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2091-
2092-
*items = (ItemPointerData *) ptr;
2093-
}
2094-
20952145
if (a->category != GIN_CAT_NORM_KEY)
20962146
return (Datum) 0;
20972147

@@ -2104,6 +2154,28 @@ _gin_parse_tuple(GinTuple *a, ItemPointerData **items)
21042154
return PointerGetDatum(a->data);
21052155
}
21062156

2157+
/*
2158+
* _gin_parse_tuple_items
2159+
* Return a pointer to a palloc'd array of decompressed TID array.
2160+
*/
2161+
static ItemPointer
2162+
_gin_parse_tuple_items(GinTuple *a)
2163+
{
2164+
int len;
2165+
char *ptr;
2166+
int ndecoded;
2167+
ItemPointer items;
2168+
2169+
len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2170+
ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2171+
2172+
items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded);
2173+
2174+
Assert(ndecoded == a->nitems);
2175+
2176+
return (ItemPointer) items;
2177+
}
2178+
21072179
/*
21082180
* _gin_compare_tuples
21092181
* Compare GIN tuples, used by tuplesort during parallel index build.
@@ -2139,8 +2211,8 @@ _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
21392211

21402212
if (a->category == GIN_CAT_NORM_KEY)
21412213
{
2142-
keya = _gin_parse_tuple(a, NULL);
2143-
keyb = _gin_parse_tuple(b, NULL);
2214+
keya = _gin_parse_tuple_key(a);
2215+
keyb = _gin_parse_tuple_key(b);
21442216

21452217
r = ApplySortComparator(keya, false,
21462218
keyb, false,

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,7 @@ GinScanEntry
10521052
GinScanKey
10531053
GinScanOpaque
10541054
GinScanOpaqueData
1055+
GinSegmentInfo
10551056
GinState
10561057
GinStatsData
10571058
GinTernaryValue

0 commit comments

Comments
 (0)