@@ -190,7 +190,9 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
190
190
Relation heap , Relation index ,
191
191
int sortmem , bool progress );
192
192
193
- static Datum _gin_parse_tuple (GinTuple * a , ItemPointerData * * items );
193
+ static ItemPointer _gin_parse_tuple_items (GinTuple * a );
194
+ static Datum _gin_parse_tuple_key (GinTuple * a );
195
+
194
196
static GinTuple * _gin_build_tuple (OffsetNumber attrnum , unsigned char category ,
195
197
Datum key , int16 typlen , bool typbyval ,
196
198
ItemPointerData * items , uint32 nitems ,
@@ -1365,7 +1367,8 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
1365
1367
1366
1368
AssertCheckGinBuffer (buffer );
1367
1369
1368
- key = _gin_parse_tuple (tup , & items );
1370
+ key = _gin_parse_tuple_key (tup );
1371
+ items = _gin_parse_tuple_items (tup );
1369
1372
1370
1373
/* if the buffer is empty, set the fields (and copy the key) */
1371
1374
if (GinBufferIsEmpty (buffer ))
@@ -1401,6 +1404,9 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
1401
1404
1402
1405
AssertCheckItemPointers (buffer );
1403
1406
}
1407
+
1408
+ /* free the decompressed TID list */
1409
+ pfree (items );
1404
1410
}
1405
1411
1406
1412
/*
@@ -1955,6 +1961,15 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
1955
1961
table_close (heapRel , heapLockmode );
1956
1962
}
1957
1963
1964
+ /*
1965
+ * Used to keep track of compressed TID lists when building a GIN tuple.
1966
+ */
1967
+ typedef struct
1968
+ {
1969
+ dlist_node node ; /* linked list pointers */
1970
+ GinPostingList * seg ;
1971
+ } GinSegmentInfo ;
1972
+
1958
1973
/*
1959
1974
* _gin_build_tuple
1960
1975
* Serialize the state for an index key into a tuple for tuplesort.
@@ -1967,6 +1982,11 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
1967
1982
* like endianess etc. We could make it a little bit smaller, but it's not
1968
1983
* worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
1969
1984
* start of the TID list anyway. So we wouldn't save anything.
1985
+ *
1986
+ * The TID list is serialized as compressed - it's highly compressible, and
1987
+ * we already have ginCompressPostingList for this purpose. The list may be
1988
+ * pretty long, so we compress it into multiple segments and then copy all
1989
+ * of that into the GIN tuple.
1970
1990
*/
1971
1991
static GinTuple *
1972
1992
_gin_build_tuple (OffsetNumber attrnum , unsigned char category ,
@@ -1980,6 +2000,11 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
1980
2000
Size tuplen ;
1981
2001
int keylen ;
1982
2002
2003
+ dlist_mutable_iter iter ;
2004
+ dlist_head segments ;
2005
+ int ncompressed ;
2006
+ Size compresslen ;
2007
+
1983
2008
/*
1984
2009
* Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
1985
2010
* have actual non-empty key. We include varlena headers and \0 bytes for
@@ -2006,12 +2031,34 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
2006
2031
else
2007
2032
elog (ERROR , "unexpected typlen value (%d)" , typlen );
2008
2033
2034
+ /* compress the item pointers */
2035
+ ncompressed = 0 ;
2036
+ compresslen = 0 ;
2037
+ dlist_init (& segments );
2038
+
2039
+ /* generate compressed segments of TID list chunks */
2040
+ while (ncompressed < nitems )
2041
+ {
2042
+ int cnt ;
2043
+ GinSegmentInfo * seginfo = palloc (sizeof (GinSegmentInfo ));
2044
+
2045
+ seginfo -> seg = ginCompressPostingList (& items [ncompressed ],
2046
+ (nitems - ncompressed ),
2047
+ UINT16_MAX ,
2048
+ & cnt );
2049
+
2050
+ ncompressed += cnt ;
2051
+ compresslen += SizeOfGinPostingList (seginfo -> seg );
2052
+
2053
+ dlist_push_tail (& segments , & seginfo -> node );
2054
+ }
2055
+
2009
2056
/*
2010
2057
* Determine GIN tuple length with all the data included. Be careful about
2011
- * alignment, to allow direct access to item pointers.
2058
+ * alignment, to allow direct access to compressed segments (those require
2059
+ * only SHORTALIGN).
2012
2060
*/
2013
- tuplen = SHORTALIGN (offsetof(GinTuple , data ) + keylen ) +
2014
- (sizeof (ItemPointerData ) * nitems );
2061
+ tuplen = SHORTALIGN (offsetof(GinTuple , data ) + keylen ) + compresslen ;
2015
2062
2016
2063
* len = tuplen ;
2017
2064
@@ -2061,37 +2108,40 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
2061
2108
/* finally, copy the TIDs into the array */
2062
2109
ptr = (char * ) tuple + SHORTALIGN (offsetof(GinTuple , data ) + keylen );
2063
2110
2064
- memcpy (ptr , items , sizeof (ItemPointerData ) * nitems );
2111
+ /* copy in the compressed data, and free the segments */
2112
+ dlist_foreach_modify (iter , & segments )
2113
+ {
2114
+ GinSegmentInfo * seginfo = dlist_container (GinSegmentInfo , node , iter .cur );
2115
+
2116
+ memcpy (ptr , seginfo -> seg , SizeOfGinPostingList (seginfo -> seg ));
2117
+
2118
+ ptr += SizeOfGinPostingList (seginfo -> seg );
2119
+
2120
+ dlist_delete (& seginfo -> node );
2121
+
2122
+ pfree (seginfo -> seg );
2123
+ pfree (seginfo );
2124
+ }
2065
2125
2066
2126
return tuple ;
2067
2127
}
2068
2128
2069
2129
/*
2070
- * _gin_parse_tuple
2071
- * Deserialize the tuple from the tuplestore representation .
2130
+ * _gin_parse_tuple_key
2131
+ * Return a Datum representing the key stored in the tuple .
2072
2132
*
2073
- * Most of the fields are actually directly accessible, the only thing that
2133
+ * Most of the tuple fields are directly accessible, the only thing that
2074
2134
* needs more care is the key and the TID list.
2075
2135
*
2076
2136
* For the key, this returns a regular Datum representing it. It's either the
2077
2137
* actual key value, or a pointer to the beginning of the data array (which is
2078
2138
* where the data was copied by _gin_build_tuple).
2079
- *
2080
- * The pointer to the TID list is returned through 'items' (which is simply
2081
- * a pointer to the data array).
2082
2139
*/
2083
2140
static Datum
2084
- _gin_parse_tuple (GinTuple * a , ItemPointerData * * items )
2141
+ _gin_parse_tuple_key (GinTuple * a )
2085
2142
{
2086
2143
Datum key ;
2087
2144
2088
- if (items )
2089
- {
2090
- char * ptr = (char * ) a + SHORTALIGN (offsetof(GinTuple , data ) + a -> keylen );
2091
-
2092
- * items = (ItemPointerData * ) ptr ;
2093
- }
2094
-
2095
2145
if (a -> category != GIN_CAT_NORM_KEY )
2096
2146
return (Datum ) 0 ;
2097
2147
@@ -2104,6 +2154,28 @@ _gin_parse_tuple(GinTuple *a, ItemPointerData **items)
2104
2154
return PointerGetDatum (a -> data );
2105
2155
}
2106
2156
2157
+ /*
2158
+ * _gin_parse_tuple_items
2159
+ * Return a pointer to a palloc'd array of decompressed TID array.
2160
+ */
2161
+ static ItemPointer
2162
+ _gin_parse_tuple_items (GinTuple * a )
2163
+ {
2164
+ int len ;
2165
+ char * ptr ;
2166
+ int ndecoded ;
2167
+ ItemPointer items ;
2168
+
2169
+ len = a -> tuplen - SHORTALIGN (offsetof(GinTuple , data ) + a -> keylen );
2170
+ ptr = (char * ) a + SHORTALIGN (offsetof(GinTuple , data ) + a -> keylen );
2171
+
2172
+ items = ginPostingListDecodeAllSegments ((GinPostingList * ) ptr , len , & ndecoded );
2173
+
2174
+ Assert (ndecoded == a -> nitems );
2175
+
2176
+ return (ItemPointer ) items ;
2177
+ }
2178
+
2107
2179
/*
2108
2180
* _gin_compare_tuples
2109
2181
* Compare GIN tuples, used by tuplesort during parallel index build.
@@ -2139,8 +2211,8 @@ _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
2139
2211
2140
2212
if (a -> category == GIN_CAT_NORM_KEY )
2141
2213
{
2142
- keya = _gin_parse_tuple ( a , NULL );
2143
- keyb = _gin_parse_tuple ( b , NULL );
2214
+ keya = _gin_parse_tuple_key ( a );
2215
+ keyb = _gin_parse_tuple_key ( b );
2144
2216
2145
2217
r = ApplySortComparator (keya , false,
2146
2218
keyb , false,
0 commit comments