Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 30efc78

Browse files
nikhilveldandaCommitfest Bot
authored and
Commitfest Bot
committed
zstd nodict compression
1 parent 1a417dd commit 30efc78

22 files changed

+644
-24
lines changed

contrib/amcheck/verify_heapam.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,6 +1792,7 @@ check_tuple_attribute(HeapCheckContext *ctx)
17921792
/* List of all valid compression method IDs */
17931793
case TOAST_PGLZ_COMPRESSION_ID:
17941794
case TOAST_LZ4_COMPRESSION_ID:
1795+
case TOAST_ZSTD_COMPRESSION_ID:
17951796
valid = true;
17961797
break;
17971798

src/backend/access/common/detoast.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,10 +246,10 @@ detoast_attr_slice(struct varlena *attr,
246246
* Determine maximum amount of compressed data needed for a prefix
247247
* of a given length (after decompression).
248248
*
249-
* At least for now, if it's LZ4 data, we'll have to fetch the
250-
* whole thing, because there doesn't seem to be an API call to
251-
* determine how much compressed data we need to be sure of being
252-
* able to decompress the required slice.
249+
* At least for now, if it's LZ4 or Zstandard data, we'll have to
250+
* fetch the whole thing, because there doesn't seem to be an API
251+
* call to determine how much compressed data we need to be sure
252+
* of being able to decompress the required slice.
253253
*/
254254
if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
255255
TOAST_PGLZ_COMPRESSION_ID)
@@ -485,6 +485,8 @@ toast_decompress_datum(struct varlena *attr)
485485
return pglz_decompress_datum(attr);
486486
case TOAST_LZ4_COMPRESSION_ID:
487487
return lz4_decompress_datum(attr);
488+
case TOAST_ZSTD_COMPRESSION_ID:
489+
return zstd_decompress_datum(attr);
488490
default:
489491
elog(ERROR, "invalid compression method id %d", cmid);
490492
return NULL; /* keep compiler quiet */
@@ -528,6 +530,8 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
528530
return pglz_decompress_datum_slice(attr, slicelength);
529531
case TOAST_LZ4_COMPRESSION_ID:
530532
return lz4_decompress_datum_slice(attr, slicelength);
533+
case TOAST_ZSTD_COMPRESSION_ID:
534+
return zstd_decompress_datum_slice(attr, slicelength);
531535
default:
532536
elog(ERROR, "invalid compression method id %d", cmid);
533537
return NULL; /* keep compiler quiet */

src/backend/access/common/reloptions.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "access/nbtree.h"
2525
#include "access/reloptions.h"
2626
#include "access/spgist_private.h"
27+
#include "access/toast_compression.h"
2728
#include "catalog/pg_type.h"
2829
#include "commands/defrem.h"
2930
#include "commands/tablespace.h"
@@ -381,7 +382,15 @@ static relopt_int intRelOpts[] =
381382
},
382383
-1, 0, 1024
383384
},
384-
385+
{
386+
{
387+
"zstd_level",
388+
"Set column's ZSTD compression level",
389+
RELOPT_KIND_ATTRIBUTE,
390+
ShareUpdateExclusiveLock
391+
},
392+
DEFAULT_ZSTD_LEVEL, MIN_ZSTD_LEVEL, MAX_ZSTD_LEVEL
393+
},
385394
/* list terminator */
386395
{{NULL}}
387396
};
@@ -2097,7 +2106,8 @@ attribute_reloptions(Datum reloptions, bool validate)
20972106
{
20982107
static const relopt_parse_elt tab[] = {
20992108
{"n_distinct", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct)},
2100-
{"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)}
2109+
{"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)},
2110+
{"zstd_level", RELOPT_TYPE_INT, offsetof(AttributeOpts, zstd_level)},
21012111
};
21022112

21032113
return (bytea *) build_reloptions(reloptions, validate,

src/backend/access/common/toast_compression.c

Lines changed: 184 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
#include <lz4.h>
1818
#endif
1919

20+
#ifdef USE_ZSTD
21+
#include <zstd.h>
22+
#endif
23+
2024
#include "access/detoast.h"
2125
#include "access/toast_compression.h"
2226
#include "common/pg_lzcompress.h"
@@ -26,11 +30,19 @@
2630
/* GUC */
2731
int default_toast_compression = TOAST_PGLZ_COMPRESSION;
2832

29-
#define NO_LZ4_SUPPORT() \
33+
#ifdef USE_ZSTD
34+
#define ZSTD_CHECK_ERROR(zstd_ret, msg) \
35+
do { \
36+
if (ZSTD_isError(zstd_ret)) \
37+
ereport(ERROR, (errmsg("%s: %s", (msg), ZSTD_getErrorName(zstd_ret)))); \
38+
} while (0)
39+
#endif
40+
41+
#define COMPRESSION_METHOD_NOT_SUPPORTED(method) \
3042
ereport(ERROR, \
3143
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
32-
errmsg("compression method lz4 not supported"), \
33-
errdetail("This functionality requires the server to be built with lz4 support.")))
44+
errmsg("compression method %s not supported", method), \
45+
errdetail("This functionality requires the server to be built with %s support.", method)))
3446

3547
/*
3648
* Compress a varlena using PGLZ.
@@ -140,7 +152,7 @@ struct varlena *
140152
lz4_compress_datum(const struct varlena *value)
141153
{
142154
#ifndef USE_LZ4
143-
NO_LZ4_SUPPORT();
155+
COMPRESSION_METHOD_NOT_SUPPORTED("lz4");
144156
return NULL; /* keep compiler quiet */
145157
#else
146158
int32 valsize;
@@ -183,7 +195,7 @@ struct varlena *
183195
lz4_decompress_datum(const struct varlena *value)
184196
{
185197
#ifndef USE_LZ4
186-
NO_LZ4_SUPPORT();
198+
COMPRESSION_METHOD_NOT_SUPPORTED("lz4");
187199
return NULL; /* keep compiler quiet */
188200
#else
189201
int32 rawsize;
@@ -216,7 +228,7 @@ struct varlena *
216228
lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
217229
{
218230
#ifndef USE_LZ4
219-
NO_LZ4_SUPPORT();
231+
COMPRESSION_METHOD_NOT_SUPPORTED("lz4");
220232
return NULL; /* keep compiler quiet */
221233
#else
222234
int32 rawsize;
@@ -246,6 +258,153 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
246258
#endif
247259
}
248260

261+
/* Compress datum using ZSTD */
262+
struct varlena *
263+
zstd_compress_datum(const struct varlena *value, CompressionInfo cmp)
264+
{
265+
#ifdef USE_ZSTD
266+
uint32 valsize = VARSIZE_ANY_EXHDR(value);
267+
size_t max_size = ZSTD_compressBound(valsize);
268+
struct varlena *compressed;
269+
size_t cmp_size;
270+
271+
if (!cmp.meta) /* ZSTD no dictionary */
272+
{
273+
/* Allocate space for the compressed varlena (header + data) */
274+
compressed = (struct varlena *) palloc(max_size + VARHDRSZ_4BCE);
275+
276+
cmp_size = ZSTD_compress(VARDATA_4BCE(compressed),
277+
max_size,
278+
VARDATA_ANY(value),
279+
valsize,
280+
cmp.zstd_level);
281+
282+
if (ZSTD_isError(cmp_size))
283+
{
284+
pfree(compressed);
285+
ZSTD_CHECK_ERROR(cmp_size, "ZSTD compression failed");
286+
}
287+
288+
/*
289+
* If compression did not reduce size, return NULL so that the
290+
* uncompressed data is stored
291+
*/
292+
if (cmp_size > valsize)
293+
{
294+
pfree(compressed);
295+
return NULL;
296+
}
297+
298+
/* Set the compressed size in the varlena header */
299+
SET_VARSIZE_COMPRESSED(compressed, cmp_size + VARHDRSZ_4BCE);
300+
}
301+
else
302+
elog(ERROR, "ZSTD metadata(dictionary) based compression not supported yet");
303+
304+
return compressed;
305+
306+
#else
307+
COMPRESSION_METHOD_NOT_SUPPORTED("zstd");
308+
return NULL;
309+
#endif
310+
}
311+
312+
/* Decompression routine */
313+
struct varlena *
314+
zstd_decompress_datum(const struct varlena *value)
315+
{
316+
#ifdef USE_ZSTD
317+
/* ZSTD no dictionary compression */
318+
uint32 actual_size_exhdr = VARDATA_COMPRESSED_GET_EXTSIZE(value);
319+
uint32 zstd_compressed_len;
320+
struct varlena *result;
321+
size_t uncmp_size;
322+
bool meta = VARATT_4BCE_PTR_HAS_META(value);
323+
324+
if (!meta) /* ZSTD no dictionary */
325+
{
326+
zstd_compressed_len = VARSIZE_ANY(value) - VARHDRSZ_4BCE;
327+
328+
/* Allocate space for the uncompressed data */
329+
result = (struct varlena *) palloc(actual_size_exhdr + VARHDRSZ);
330+
331+
uncmp_size = ZSTD_decompress(VARDATA(result),
332+
actual_size_exhdr,
333+
VARDATA_4BCE(value),
334+
zstd_compressed_len);
335+
336+
if (ZSTD_isError(uncmp_size))
337+
{
338+
pfree(result);
339+
ZSTD_CHECK_ERROR(uncmp_size, "ZSTD decompression failed");
340+
}
341+
342+
/* Set final size in the varlena header */
343+
SET_VARSIZE(result, uncmp_size + VARHDRSZ);
344+
}
345+
else
346+
elog(ERROR, "ZSTD metadata(dictionary) based decompression not supported yet");
347+
348+
return result;
349+
350+
#else
351+
COMPRESSION_METHOD_NOT_SUPPORTED("zstd");
352+
return NULL;
353+
#endif
354+
}
355+
356+
/* Decompress a slice of the datum */
357+
struct varlena *
358+
zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength)
359+
{
360+
#ifdef USE_ZSTD
361+
/* ZSTD no dictionary compression */
362+
363+
struct varlena *result;
364+
ZSTD_inBuffer inBuf;
365+
ZSTD_outBuffer outBuf;
366+
size_t ret;
367+
ZSTD_DCtx *ZstdDecompressionCtx;
368+
bool meta = VARATT_4BCE_PTR_HAS_META(value);
369+
370+
if (!meta) /* ZSTD no dictionary */
371+
{
372+
ZstdDecompressionCtx = ZSTD_createDCtx();
373+
inBuf.src = VARDATA_4BCE(value);
374+
inBuf.size = VARSIZE_ANY(value) - VARHDRSZ_4BCE;
375+
inBuf.pos = 0;
376+
377+
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
378+
outBuf.dst = (char *) result + VARHDRSZ;
379+
outBuf.size = slicelength;
380+
outBuf.pos = 0;
381+
382+
/* Common decompression loop */
383+
while (inBuf.pos < inBuf.size && outBuf.pos < outBuf.size)
384+
{
385+
ret = ZSTD_decompressStream(ZstdDecompressionCtx, &outBuf, &inBuf);
386+
if (ZSTD_isError(ret))
387+
{
388+
pfree(result);
389+
ZSTD_freeDCtx(ZstdDecompressionCtx);
390+
ZSTD_CHECK_ERROR(ret, "zstd decompression failed");
391+
}
392+
}
393+
394+
Assert(outBuf.size == slicelength && outBuf.pos == slicelength);
395+
SET_VARSIZE(result, outBuf.pos + VARHDRSZ);
396+
ZSTD_freeDCtx(ZstdDecompressionCtx);
397+
}
398+
else
399+
elog(ERROR, "ZSTD metadata(dictionary) based decompression not supported yet");
400+
401+
return result;
402+
#else
403+
COMPRESSION_METHOD_NOT_SUPPORTED("zstd");
404+
return NULL;
405+
#endif
406+
}
407+
249408
/*
250409
* Extract compression ID from a varlena.
251410
*
@@ -290,10 +449,17 @@ CompressionNameToMethod(const char *compression)
290449
else if (strcmp(compression, "lz4") == 0)
291450
{
292451
#ifndef USE_LZ4
293-
NO_LZ4_SUPPORT();
452+
COMPRESSION_METHOD_NOT_SUPPORTED("lz4");
294453
#endif
295454
return TOAST_LZ4_COMPRESSION;
296455
}
456+
else if (strcmp(compression, "zstd") == 0)
457+
{
458+
#ifndef USE_ZSTD
459+
COMPRESSION_METHOD_NOT_SUPPORTED("zstd");
460+
#endif
461+
return TOAST_ZSTD_COMPRESSION;
462+
}
297463

298464
return InvalidCompressionMethod;
299465
}
@@ -310,6 +476,8 @@ GetCompressionMethodName(char method)
310476
return "pglz";
311477
case TOAST_LZ4_COMPRESSION:
312478
return "lz4";
479+
case TOAST_ZSTD_COMPRESSION:
480+
return "zstd";
313481
default:
314482
elog(ERROR, "invalid compression method %c", method);
315483
return NULL; /* keep compiler quiet */
@@ -324,6 +492,7 @@ setup_cmp_info(char cmethod, Form_pg_attribute att)
324492
/* initialize from the attribute’s default settings */
325493
info.cmethod = cmethod;
326494
info.meta = false;
495+
info.zstd_level = DEFAULT_ZSTD_LEVEL;
327496

328497
/* If the compression method is not valid, use the current default */
329498
if (!CompressionMethodIsValid(cmethod))
@@ -334,6 +503,14 @@ setup_cmp_info(char cmethod, Form_pg_attribute att)
334503
case TOAST_PGLZ_COMPRESSION:
335504
case TOAST_LZ4_COMPRESSION:
336505
break;
506+
case TOAST_ZSTD_COMPRESSION:
507+
{
508+
AttributeOpts *aopt = get_attribute_options(att->attrelid, att->attnum);
509+
510+
if (aopt != NULL)
511+
info.zstd_level = aopt->zstd_level;
512+
}
513+
break;
337514
default:
338515
elog(ERROR, "invalid compression method %c", info.cmethod);
339516
}

src/backend/access/common/toast_internals.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ toast_compress_datum(Datum value, CompressionInfo cmp)
6767
tmp = lz4_compress_datum((const struct varlena *) value);
6868
cmid = TOAST_LZ4_COMPRESSION_ID;
6969
break;
70+
case TOAST_ZSTD_COMPRESSION:
71+
tmp = zstd_compress_datum((const struct varlena *) value, cmp);
72+
cmid = TOAST_ZSTD_COMPRESSION_ID;
73+
break;
7074
default:
7175
elog(ERROR, "invalid compression method %c", cmp.cmethod);
7276
}

src/backend/utils/adt/varlena.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5301,6 +5301,9 @@ pg_column_compression(PG_FUNCTION_ARGS)
53015301
case TOAST_LZ4_COMPRESSION_ID:
53025302
result = "lz4";
53035303
break;
5304+
case TOAST_ZSTD_COMPRESSION_ID:
5305+
result = "zstd";
5306+
break;
53045307
default:
53055308
elog(ERROR, "invalid compression method id %d", cmid);
53065309
}

src/backend/utils/misc/guc_tables.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,9 @@ static const struct config_enum_entry default_toast_compression_options[] = {
460460
{"pglz", TOAST_PGLZ_COMPRESSION, false},
461461
#ifdef USE_LZ4
462462
{"lz4", TOAST_LZ4_COMPRESSION, false},
463+
#endif
464+
#ifdef USE_ZSTD
465+
{"zstd", TOAST_ZSTD_COMPRESSION, false},
463466
#endif
464467
{NULL, 0, false}
465468
};

src/backend/utils/misc/postgresql.conf.sample

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate
752752
#row_security = on
753753
#default_table_access_method = 'heap'
754754
#default_tablespace = '' # a tablespace name, '' uses the default
755-
#default_toast_compression = 'pglz' # 'pglz' or 'lz4'
755+
#default_toast_compression = 'pglz' # 'pglz' or 'lz4' or 'zstd'
756756
#temp_tablespaces = '' # a list of tablespace names, '' uses
757757
# only default tablespace
758758
#check_function_bodies = on

src/bin/pg_dump/pg_dump.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17570,6 +17570,9 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
1757017570
case 'l':
1757117571
cmname = "lz4";
1757217572
break;
17573+
case 'z':
17574+
cmname = "zstd";
17575+
break;
1757317576
default:
1757417577
cmname = NULL;
1757517578
break;

0 commit comments

Comments
 (0)