diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index aa9cccd1da4f..b50f3b43951a 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1786,12 +1786,13 @@ check_tuple_attribute(HeapCheckContext *ctx) bool valid = false; /* Compressed attributes should have a valid compression method */ - cmid = TOAST_COMPRESS_METHOD(&toast_pointer); + cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); switch (cmid) { /* List of all valid compression method IDs */ case TOAST_PGLZ_COMPRESSION_ID: case TOAST_LZ4_COMPRESSION_ID: + case TOAST_ZSTD_COMPRESSION_ID: valid = true; break; diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index 861f397e6db5..eb19739da03f 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -223,6 +223,7 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, { Datum cvalue; char compression; + CompressionInfo cmp; Form_pg_attribute att = TupleDescAttr(brdesc->bd_tupdesc, keyno); @@ -237,7 +238,8 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, else compression = InvalidCompressionMethod; - cvalue = toast_compress_datum(value, compression); + cmp = setup_cmp_info(compression, att); + cvalue = toast_compress_datum(value, cmp); if (DatumGetPointer(cvalue) != NULL) { diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 626517877422..6a2e6c9683d2 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -246,10 +246,10 @@ detoast_attr_slice(struct varlena *attr, * Determine maximum amount of compressed data needed for a prefix * of a given length (after decompression). * - * At least for now, if it's LZ4 data, we'll have to fetch the - * whole thing, because there doesn't seem to be an API call to - * determine how much compressed data we need to be sure of being - * able to decompress the required slice. + * At least for now, if it's LZ4 or Zstandard data, we'll have to + * fetch the whole thing, because there doesn't seem to be an API + * call to determine how much compressed data we need to be sure + * of being able to decompress the required slice. */ if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == TOAST_PGLZ_COMPRESSION_ID) @@ -478,13 +478,15 @@ toast_decompress_datum(struct varlena *attr) * Fetch the compression method id stored in the compression header and * decompress the data using the appropriate decompression routine. */ - cmid = TOAST_COMPRESS_METHOD(attr); + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); switch (cmid) { case TOAST_PGLZ_COMPRESSION_ID: return pglz_decompress_datum(attr); case TOAST_LZ4_COMPRESSION_ID: return lz4_decompress_datum(attr); + case TOAST_ZSTD_COMPRESSION_ID: + return zstd_decompress_datum(attr); default: elog(ERROR, "invalid compression method id %d", cmid); return NULL; /* keep compiler quiet */ @@ -514,20 +516,22 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) * have been seen to give wrong results if passed an output size that is * more than the data's true decompressed size. */ - if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr)) + if ((uint32) slicelength >= VARDATA_COMPRESSED_GET_EXTSIZE(attr)) return toast_decompress_datum(attr); /* * Fetch the compression method id stored in the compression header and * decompress the data slice using the appropriate decompression routine. */ - cmid = TOAST_COMPRESS_METHOD(attr); + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); switch (cmid) { case TOAST_PGLZ_COMPRESSION_ID: return pglz_decompress_datum_slice(attr, slicelength); case TOAST_LZ4_COMPRESSION_ID: return lz4_decompress_datum_slice(attr, slicelength); + case TOAST_ZSTD_COMPRESSION_ID: + return zstd_decompress_datum_slice(attr, slicelength); default: elog(ERROR, "invalid compression method id %d", cmid); return NULL; /* keep compiler quiet */ diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index 1986b943a28b..1fe0e4288cc0 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -123,9 +123,10 @@ index_form_tuple_context(TupleDesc tupleDescriptor, att->attstorage == TYPSTORAGE_MAIN)) { Datum cvalue; + CompressionInfo cmp; - cvalue = toast_compress_datum(untoasted_values[i], - att->attcompression); + cmp = setup_cmp_info(att->attcompression, att); + cvalue = toast_compress_datum(untoasted_values[i], cmp); if (DatumGetPointer(cvalue) != NULL) { diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 46c1dce222d1..1267668a2424 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -24,6 +24,7 @@ #include "access/nbtree.h" #include "access/reloptions.h" #include "access/spgist_private.h" +#include "access/toast_compression.h" #include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/tablespace.h" @@ -381,7 +382,15 @@ static relopt_int intRelOpts[] = }, -1, 0, 1024 }, - + { + { + "zstd_level", + "Set column's ZSTD compression level", + RELOPT_KIND_ATTRIBUTE, + ShareUpdateExclusiveLock + }, + DEFAULT_ZSTD_LEVEL, MIN_ZSTD_LEVEL, MAX_ZSTD_LEVEL + }, /* list terminator */ {{NULL}} }; @@ -2097,7 +2106,8 @@ attribute_reloptions(Datum reloptions, bool validate) { static const relopt_parse_elt tab[] = { {"n_distinct", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct)}, - {"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)} + {"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)}, + {"zstd_level", RELOPT_TYPE_INT, offsetof(AttributeOpts, zstd_level)}, }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 21f2f4af97e3..5e7322ce7b9e 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -17,19 +17,32 @@ #include #endif +#ifdef USE_ZSTD +#include +#endif + #include "access/detoast.h" #include "access/toast_compression.h" #include "common/pg_lzcompress.h" #include "varatt.h" +#include "utils/attoptcache.h" /* GUC */ int default_toast_compression = TOAST_PGLZ_COMPRESSION; -#define NO_LZ4_SUPPORT() \ +#ifdef USE_ZSTD +#define ZSTD_CHECK_ERROR(zstd_ret, msg) \ + do { \ + if (ZSTD_isError(zstd_ret)) \ + ereport(ERROR, (errmsg("%s: %s", (msg), ZSTD_getErrorName(zstd_ret)))); \ + } while (0) +#endif + +#define COMPRESSION_METHOD_NOT_SUPPORTED(method) \ ereport(ERROR, \ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ - errmsg("compression method lz4 not supported"), \ - errdetail("This functionality requires the server to be built with lz4 support."))) + errmsg("compression method %s not supported", method), \ + errdetail("This functionality requires the server to be built with %s support.", method))) /* * Compress a varlena using PGLZ. @@ -139,7 +152,7 @@ struct varlena * lz4_compress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 valsize; @@ -182,7 +195,7 @@ struct varlena * lz4_decompress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -215,7 +228,7 @@ struct varlena * lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -245,6 +258,153 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) #endif } +/* Compress datum using ZSTD */ +struct varlena * +zstd_compress_datum(const struct varlena *value, CompressionInfo cmp) +{ +#ifdef USE_ZSTD + uint32 valsize = VARSIZE_ANY_EXHDR(value); + size_t max_size = ZSTD_compressBound(valsize); + struct varlena *compressed; + size_t cmp_size; + + if (!cmp.meta) /* ZSTD no dictionary */ + { + /* Allocate space for the compressed varlena (header + data) */ + compressed = (struct varlena *) palloc(max_size + VARHDRSZ_4BCE); + + cmp_size = ZSTD_compress(VARDATA_4BCE(compressed), + max_size, + VARDATA_ANY(value), + valsize, + cmp.zstd_level); + + if (ZSTD_isError(cmp_size)) + { + pfree(compressed); + ZSTD_CHECK_ERROR(cmp_size, "ZSTD compression failed"); + } + + /* + * If compression did not reduce size, return NULL so that the + * uncompressed data is stored + */ + if (cmp_size > valsize) + { + pfree(compressed); + return NULL; + } + + /* Set the compressed size in the varlena header */ + SET_VARSIZE_COMPRESSED(compressed, cmp_size + VARHDRSZ_4BCE); + } + else + elog(ERROR, "ZSTD metadata(dictionary) based compression not supported yet"); + + return compressed; + +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + +/* Decompression routine */ +struct varlena * +zstd_decompress_datum(const struct varlena *value) +{ +#ifdef USE_ZSTD + /* ZSTD no dictionary compression */ + uint32 actual_size_exhdr = VARDATA_COMPRESSED_GET_EXTSIZE(value); + uint32 zstd_compressed_len; + struct varlena *result; + size_t uncmp_size; + bool meta = VARATT_4BCE_PTR_HAS_META(value); + + if (!meta) /* ZSTD no dictionary */ + { + zstd_compressed_len = VARSIZE_ANY(value) - VARHDRSZ_4BCE; + + /* Allocate space for the uncompressed data */ + result = (struct varlena *) palloc(actual_size_exhdr + VARHDRSZ); + + uncmp_size = ZSTD_decompress(VARDATA(result), + actual_size_exhdr, + VARDATA_4BCE(value), + zstd_compressed_len); + + if (ZSTD_isError(uncmp_size)) + { + pfree(result); + ZSTD_CHECK_ERROR(uncmp_size, "ZSTD decompression failed"); + } + + /* Set final size in the varlena header */ + SET_VARSIZE(result, uncmp_size + VARHDRSZ); + } + else + elog(ERROR, "ZSTD metadata(dictionary) based decompression not supported yet"); + + return result; + +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + +/* Decompress a slice of the datum */ +struct varlena * +zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength) +{ +#ifdef USE_ZSTD + /* ZSTD no dictionary compression */ + + struct varlena *result; + ZSTD_inBuffer inBuf; + ZSTD_outBuffer outBuf; + size_t ret; + ZSTD_DCtx *ZstdDecompressionCtx; + bool meta = VARATT_4BCE_PTR_HAS_META(value); + + if (!meta) /* ZSTD no dictionary */ + { + ZstdDecompressionCtx = ZSTD_createDCtx(); + inBuf.src = VARDATA_4BCE(value); + inBuf.size = VARSIZE_ANY(value) - VARHDRSZ_4BCE; + inBuf.pos = 0; + + result = (struct varlena *) palloc(slicelength + VARHDRSZ); + outBuf.dst = (char *) result + VARHDRSZ; + outBuf.size = slicelength; + outBuf.pos = 0; + + /* Common decompression loop */ + while (inBuf.pos < inBuf.size && outBuf.pos < outBuf.size) + { + ret = ZSTD_decompressStream(ZstdDecompressionCtx, &outBuf, &inBuf); + if (ZSTD_isError(ret)) + { + pfree(result); + ZSTD_freeDCtx(ZstdDecompressionCtx); + ZSTD_CHECK_ERROR(ret, "zstd decompression failed"); + } + } + + Assert(outBuf.size == slicelength && outBuf.pos == slicelength); + SET_VARSIZE(result, outBuf.pos + VARHDRSZ); + ZSTD_freeDCtx(ZstdDecompressionCtx); + } + else + elog(ERROR, "ZSTD metadata(dictionary) based decompression not supported yet"); + + return result; +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + /* * Extract compression ID from a varlena. * @@ -289,10 +449,17 @@ CompressionNameToMethod(const char *compression) else if (strcmp(compression, "lz4") == 0) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); #endif return TOAST_LZ4_COMPRESSION; } + else if (strcmp(compression, "zstd") == 0) + { +#ifndef USE_ZSTD + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); +#endif + return TOAST_ZSTD_COMPRESSION; + } return InvalidCompressionMethod; } @@ -309,8 +476,44 @@ GetCompressionMethodName(char method) return "pglz"; case TOAST_LZ4_COMPRESSION: return "lz4"; + case TOAST_ZSTD_COMPRESSION: + return "zstd"; default: elog(ERROR, "invalid compression method %c", method); return NULL; /* keep compiler quiet */ } } + +CompressionInfo +setup_cmp_info(char cmethod, Form_pg_attribute att) +{ + CompressionInfo info; + + /* initialize from the attribute’s default settings */ + info.cmethod = cmethod; + info.meta = false; + info.zstd_level = DEFAULT_ZSTD_LEVEL; + + /* If the compression method is not valid, use the current default */ + if (!CompressionMethodIsValid(cmethod)) + info.cmethod = default_toast_compression; + + switch (info.cmethod) + { + case TOAST_PGLZ_COMPRESSION: + case TOAST_LZ4_COMPRESSION: + break; + case TOAST_ZSTD_COMPRESSION: + { + AttributeOpts *aopt = get_attribute_options(att->attrelid, att->attnum); + + if (aopt != NULL) + info.zstd_level = aopt->zstd_level; + } + break; + default: + elog(ERROR, "invalid compression method %c", info.cmethod); + } + + return info; +} diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 7d8be8346ce5..b779f61da0ab 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -43,7 +43,7 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid); * ---------- */ Datum -toast_compress_datum(Datum value, char cmethod) +toast_compress_datum(Datum value, CompressionInfo cmp) { struct varlena *tmp = NULL; int32 valsize; @@ -54,14 +54,10 @@ toast_compress_datum(Datum value, char cmethod) valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); - /* If the compression method is not valid, use the current default */ - if (!CompressionMethodIsValid(cmethod)) - cmethod = default_toast_compression; - /* * Call appropriate compression routine for the compression method. */ - switch (cmethod) + switch (cmp.cmethod) { case TOAST_PGLZ_COMPRESSION: tmp = pglz_compress_datum((const struct varlena *) value); @@ -71,8 +67,12 @@ toast_compress_datum(Datum value, char cmethod) tmp = lz4_compress_datum((const struct varlena *) value); cmid = TOAST_LZ4_COMPRESSION_ID; break; + case TOAST_ZSTD_COMPRESSION: + tmp = zstd_compress_datum((const struct varlena *) value, cmp); + cmid = TOAST_ZSTD_COMPRESSION_ID; + break; default: - elog(ERROR, "invalid compression method %c", cmethod); + elog(ERROR, "invalid compression method %c", cmp.cmethod); } if (tmp == NULL) @@ -90,9 +90,11 @@ toast_compress_datum(Datum value, char cmethod) */ if (VARSIZE(tmp) < valsize - 2) { + bool meta = cmp.meta; + /* successful compression */ Assert(cmid != TOAST_INVALID_COMPRESSION_ID); - TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid); + TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid, meta); return PointerGetDatum(tmp); } else @@ -143,6 +145,7 @@ toast_save_datum(Relation rel, Datum value, Pointer dval = DatumGetPointer(value); int num_indexes; int validIndex; + ToastCompressionId cm = TOAST_INVALID_COMPRESSION_ID; Assert(!VARATT_IS_EXTERNAL(value)); @@ -179,14 +182,18 @@ toast_save_datum(Relation rel, Datum value, } else if (VARATT_IS_COMPRESSED(dval)) { + bool meta; + data_p = VARDATA(dval); data_todo = VARSIZE(dval) - VARHDRSZ; /* rawsize in a compressed datum is just the size of the payload */ toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ; - + cm = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval); + meta = TOAST_CMPID_EXTENDED(cm) ? + VARATT_4BCE_HAS_META(((varattrib_4b *) (dval))->va_compressed_ext.va_ecinfo) : + false; /* set external size and compression method */ - VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo, - VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval)); + VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo, cm, meta); /* Assert that the numbers look like it's compressed */ Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); } @@ -368,9 +375,9 @@ toast_save_datum(Relation rel, Datum value, /* * Create the TOAST pointer value that we'll return */ - result = (struct varlena *) palloc(TOAST_POINTER_SIZE); + result = (struct varlena *) palloc(TOAST_CMPID_EXTENDED(cm) ? TOAST_POINTER_EXT_SIZE : TOAST_POINTER_NOEXT_SIZE); SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK); - memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer)); + memcpy(VARDATA_EXTERNAL(result), &toast_pointer, TOAST_CMPID_EXTENDED(cm) ? TOAST_POINTER_EXT_SIZE - VARHDRSZ_EXTERNAL : TOAST_POINTER_NOEXT_SIZE - VARHDRSZ_EXTERNAL); return PointerGetDatum(result); } diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index b60fab0a4d29..1edd07634dbb 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -171,7 +171,7 @@ toast_tuple_init(ToastTupleContext *ttc) * The column must have attstorage EXTERNAL or EXTENDED if check_main is * false, and must have attstorage MAIN if check_main is true. * - * The column must have a minimum size of MAXALIGN(TOAST_POINTER_SIZE); + * The column must have a minimum size of MAXALIGN(TOAST_POINTER_NOEXT_SIZE); * if not, no benefit is to be expected by compressing it. * * The return value is the index of the biggest suitable column, or @@ -184,7 +184,7 @@ toast_tuple_find_biggest_attribute(ToastTupleContext *ttc, TupleDesc tupleDesc = ttc->ttc_rel->rd_att; int numAttrs = tupleDesc->natts; int biggest_attno = -1; - int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); + int32 biggest_size = MAXALIGN(TOAST_POINTER_NOEXT_SIZE); int32 skip_colflags = TOASTCOL_IGNORE; int i; @@ -229,8 +229,10 @@ toast_tuple_try_compression(ToastTupleContext *ttc, int attribute) Datum *value = &ttc->ttc_values[attribute]; Datum new_value; ToastAttrInfo *attr = &ttc->ttc_attr[attribute]; + Form_pg_attribute att = TupleDescAttr(ttc->ttc_rel->rd_att, attribute); + CompressionInfo cmp = setup_cmp_info(attr->tai_compression, att); - new_value = toast_compress_datum(*value, attr->tai_compression); + new_value = toast_compress_datum(*value, cmp); if (DatumGetPointer(new_value) != NULL) { diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde8..063780e56dc1 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -5301,6 +5301,9 @@ pg_column_compression(PG_FUNCTION_ARGS) case TOAST_LZ4_COMPRESSION_ID: result = "lz4"; break; + case TOAST_ZSTD_COMPRESSION_ID: + result = "zstd"; + break; default: elog(ERROR, "invalid compression method id %d", cmid); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 2f8cbd867599..9c167653f430 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -460,6 +460,9 @@ static const struct config_enum_entry default_toast_compression_options[] = { {"pglz", TOAST_PGLZ_COMPRESSION, false}, #ifdef USE_LZ4 {"lz4", TOAST_LZ4_COMPRESSION, false}, +#endif +#ifdef USE_ZSTD + {"zstd", TOAST_ZSTD_COMPRESSION, false}, #endif {NULL, 0, false} }; diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 87ce76b18f41..93d3c97179a9 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -752,7 +752,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate #row_security = on #default_table_access_method = 'heap' #default_tablespace = '' # a tablespace name, '' uses the default -#default_toast_compression = 'pglz' # 'pglz' or 'lz4' +#default_toast_compression = 'pglz' # 'pglz' or 'lz4' or 'zstd' #temp_tablespaces = '' # a list of tablespace names, '' uses # only default tablespace #check_function_bodies = on diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 37432e66efd7..6083ae1a6ad2 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -17570,6 +17570,9 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) case 'l': cmname = "lz4"; break; + case 'z': + cmname = "zstd"; + break; default: cmname = NULL; break; diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 1d08268393e3..26951f8f8904 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2171,8 +2171,9 @@ describeOneTableDetails(const char *schemaname, /* these strings are literal in our syntax, so not translated. */ printTableAddCell(&cont, (compression[0] == 'p' ? "pglz" : (compression[0] == 'l' ? "lz4" : - (compression[0] == '\0' ? "" : - "???"))), + (compression[0] == 'z' ? "zstd" : + (compression[0] == '\0' ? "" : + "???")))), false, false); } diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index ec65ab79fecb..64f26156aa27 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -2881,11 +2881,11 @@ match_previous_words(int pattern_id, /* ALTER TABLE ALTER [COLUMN] SET ( */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "(") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "(")) - COMPLETE_WITH("n_distinct", "n_distinct_inherited"); + COMPLETE_WITH("n_distinct", "n_distinct_inherited", "zstd_level"); /* ALTER TABLE ALTER [COLUMN] SET COMPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "COMPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "COMPRESSION")) - COMPLETE_WITH("DEFAULT", "PGLZ", "LZ4"); + COMPLETE_WITH("DEFAULT", "PGLZ", "LZ4", "ZSTD"); /* ALTER TABLE ALTER [COLUMN] SET EXPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "EXPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "EXPRESSION")) diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h index e603a2276c38..8dbbe4d81923 100644 --- a/src/include/access/detoast.h +++ b/src/include/access/detoast.h @@ -23,12 +23,13 @@ do { \ varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \ Assert(VARATT_IS_EXTERNAL(attre)); \ - Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \ - memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \ + memset(&(toast_pointer), 0, sizeof(toast_pointer)); \ + memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), VARSIZE_EXTERNAL(attre) - VARHDRSZ_EXTERNAL); \ } while (0) /* Size of an EXTERNAL datum that contains a standard TOAST pointer */ -#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_external)) +#define TOAST_POINTER_NOEXT_SIZE (VARHDRSZ_EXTERNAL + offsetof(varatt_external, extended)) +#define TOAST_POINTER_EXT_SIZE (TOAST_POINTER_NOEXT_SIZE + MEMBER_SIZE(varatt_external, extended.cmp)) /* Size of an EXTERNAL datum that contains an indirection pointer */ #define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_indirect)) diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h index 13c4612ceedc..426f364bfabf 100644 --- a/src/include/access/toast_compression.h +++ b/src/include/access/toast_compression.h @@ -13,6 +13,12 @@ #ifndef TOAST_COMPRESSION_H #define TOAST_COMPRESSION_H +#include "catalog/pg_attribute.h" + +#ifdef USE_ZSTD +#include +#endif + /* * GUC support. * @@ -22,25 +28,21 @@ */ extern PGDLLIMPORT int default_toast_compression; -/* - * Built-in compression method ID. The toast compression header will store - * this in the first 2 bits of the raw length. These built-in compression - * method IDs are directly mapped to the built-in compression methods. - * - * Don't use these values for anything other than understanding the meaning - * of the raw bits from a varlena; in particular, if the goal is to identify - * a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc. - * below. We might someday support more than 4 compression methods, but - * we can never have more than 4 values in this enum, because there are - * only 2 bits available in the places where this is stored. - */ typedef enum ToastCompressionId { TOAST_PGLZ_COMPRESSION_ID = 0, TOAST_LZ4_COMPRESSION_ID = 1, - TOAST_INVALID_COMPRESSION_ID = 2, + TOAST_ZSTD_COMPRESSION_ID = 2, + TOAST_INVALID_COMPRESSION_ID = 3, } ToastCompressionId; +typedef struct CompressionInfo +{ + char cmethod; + bool meta; + int zstd_level; +} CompressionInfo; + /* * Built-in compression methods. pg_attribute will store these in the * attcompression column. In attcompression, InvalidCompressionMethod @@ -48,10 +50,21 @@ typedef enum ToastCompressionId */ #define TOAST_PGLZ_COMPRESSION 'p' #define TOAST_LZ4_COMPRESSION 'l' +#define TOAST_ZSTD_COMPRESSION 'z' #define InvalidCompressionMethod '\0' #define CompressionMethodIsValid(cm) ((cm) != InvalidCompressionMethod) +#define TOAST_CMPID_EXTENDED(cmpid) (!(cmpid == TOAST_PGLZ_COMPRESSION_ID || cmpid == TOAST_LZ4_COMPRESSION_ID ||cmpid == TOAST_INVALID_COMPRESSION_ID)) +#ifdef USE_ZSTD +#define DEFAULT_ZSTD_LEVEL ZSTD_CLEVEL_DEFAULT +#define MIN_ZSTD_LEVEL (int)-ZSTD_BLOCKSIZE_MAX +#define MAX_ZSTD_LEVEL 22 +#else +#define DEFAULT_ZSTD_LEVEL 0 +#define MIN_ZSTD_LEVEL 0 +#define MAX_ZSTD_LEVEL 0 +#endif /* pglz compression/decompression routines */ extern struct varlena *pglz_compress_datum(const struct varlena *value); @@ -65,9 +78,15 @@ extern struct varlena *lz4_decompress_datum(const struct varlena *value); extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength); +/* zstd nodict compression/decompression routines */ +extern struct varlena *zstd_compress_datum(const struct varlena *value, CompressionInfo cmp); +extern struct varlena *zstd_decompress_datum(const struct varlena *value); +extern struct varlena *zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength); + /* other stuff */ extern ToastCompressionId toast_get_compression_id(struct varlena *attr); extern char CompressionNameToMethod(const char *compression); extern const char *GetCompressionMethodName(char method); +extern CompressionInfo setup_cmp_info(char cmethod, Form_pg_attribute att); #endif /* TOAST_COMPRESSION_H */ diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index 06ae8583c1e1..431ed4b038a6 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -17,35 +17,32 @@ #include "utils/relcache.h" #include "utils/snapshot.h" -/* - * The information at the start of the compressed toast data. - */ -typedef struct toast_compress_header -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - uint32 tcinfo; /* 2 bits for compression method and 30 bits - * external size; see va_extinfo */ -} toast_compress_header; - /* * Utilities for manipulation of header information for compressed * toast entries. */ -#define TOAST_COMPRESS_EXTSIZE(ptr) \ - (((toast_compress_header *) (ptr))->tcinfo & VARLENA_EXTSIZE_MASK) -#define TOAST_COMPRESS_METHOD(ptr) \ - (((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS) - -#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \ - do { \ - Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ - Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ - ((toast_compress_header *) (ptr))->tcinfo = \ - (len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \ +#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method, meta) \ + do { \ + Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ + Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm_method) == TOAST_LZ4_COMPRESSION_ID || \ + (cm_method) == TOAST_ZSTD_COMPRESSION_ID); \ + if (!TOAST_CMPID_EXTENDED((cm_method))) \ + { \ + ((varattrib_4b *)(ptr))->va_compressed.va_tcinfo = \ + ((uint32)(len)) | ((uint32)(cm_method) << VARLENA_EXTSIZE_BITS); \ + } \ + else \ + { \ + /* extended path: mark EXT flag in tcinfo */ \ + ((varattrib_4b *)(ptr))->va_compressed_ext.va_tcinfo = \ + ((uint32)(len)) | ((uint32)(VARATT_4BCE_EXTFLAG) << VARLENA_EXTSIZE_BITS); \ + ((varattrib_4b *)(ptr))->va_compressed_ext.va_ecinfo = \ + VARATT_4BCE_ENCODE((meta), (cm_method)); \ + } \ } while (0) -extern Datum toast_compress_datum(Datum value, char cmethod); +extern Datum toast_compress_datum(Datum value, CompressionInfo cmp); extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock); extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative); diff --git a/src/include/utils/attoptcache.h b/src/include/utils/attoptcache.h index f684a772af52..51d65ebd6461 100644 --- a/src/include/utils/attoptcache.h +++ b/src/include/utils/attoptcache.h @@ -21,6 +21,7 @@ typedef struct AttributeOpts int32 vl_len_; /* varlena header (do not touch directly!) */ float8 n_distinct; float8 n_distinct_inherited; + int zstd_level; } AttributeOpts; extern AttributeOpts *get_attribute_options(Oid attrelid, int attnum); diff --git a/src/include/varatt.h b/src/include/varatt.h index 2e8564d49980..eb63d28af82d 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -28,6 +28,9 @@ * you need to memcpy from the tuple into a local struct variable before * you can look at these fields! (The reason we use memcmp is to avoid * having to do that just to detect equality of two TOAST pointers...) + * + * When the top two bits of va_extinfo (as checked by VARATT_4BCE_EXTFLAG) are set, + * It means it holds additional information. */ typedef struct varatt_external { @@ -36,6 +39,14 @@ typedef struct varatt_external * compression method */ Oid va_valueid; /* Unique ID of value within TOAST table */ Oid va_toastrelid; /* RelID of TOAST table containing it */ + /* -------- optional trailer -------- */ + union + { + struct /* compression-method trailer */ + { + uint8 va_ecinfo; /* Extended compression methods info */ + } cmp; + } extended; /* "extended" = optional bytes */ } varatt_external; /* @@ -93,11 +104,24 @@ typedef enum vartag_external #define VARTAG_IS_EXPANDED(tag) \ (((tag) & ~1) == VARTAG_EXPANDED_RO) -#define VARTAG_SIZE(tag) \ - ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ - VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \ - (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ - (AssertMacro(false), 0)) +#define MEMBER_SIZE(type, member) sizeof( ((type *)0)->member ) + +#define VARTAG_SIZE(PTR) \ +( \ + VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT ? \ + sizeof(varatt_indirect) : \ + VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)) ? \ + sizeof(varatt_expanded) : \ + VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK ? \ + (offsetof(varatt_external, extended) + \ + ((UNALIGNED_U32((const uint8 *)(PTR) + VARHDRSZ_EXTERNAL + \ + offsetof(varatt_external, va_extinfo)) \ + >> VARLENA_EXTSIZE_BITS) == VARATT_4BCE_EXTFLAG \ + ? MEMBER_SIZE(varatt_external, extended.cmp) \ + : 0)) \ + : \ + (AssertMacro(false), 0) \ +) /* * These structs describe the header of a varlena object that may have been @@ -122,6 +146,14 @@ typedef union * compression method; see va_extinfo */ char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Compressed data */ } va_compressed; + struct + { + uint32 va_header; + uint32 va_tcinfo; /* Original data size (excludes header) and + * compression method; see va_extinfo */ + uint8 va_ecinfo; /* algorithm id (0–255) */ + char va_data[FLEXIBLE_ARRAY_MEMBER]; + } va_compressed_ext; } varattrib_4b; typedef struct @@ -206,6 +238,12 @@ typedef struct (((varattrib_1b_e *) (PTR))->va_header = 0x80, \ ((varattrib_1b_e *) (PTR))->va_tag = (tag)) +#define UNALIGNED_U32(ptr) \ + ( (uint32) (((const uint8 *)(ptr))[3]) \ + | ((uint32)(((const uint8 *)(ptr))[2]) << 8) \ + | ((uint32)(((const uint8 *)(ptr))[1]) << 16) \ + | ((uint32)(((const uint8 *)(ptr))[0]) << 24) ) + #else /* !WORDS_BIGENDIAN */ #define VARATT_IS_4B(PTR) \ @@ -239,6 +277,12 @@ typedef struct (((varattrib_1b_e *) (PTR))->va_header = 0x01, \ ((varattrib_1b_e *) (PTR))->va_tag = (tag)) +#define UNALIGNED_U32(ptr) \ + ( (uint32) (((const uint8 *)(ptr))[0]) \ + | ((uint32)(((const uint8 *)(ptr))[1]) << 8) \ + | ((uint32)(((const uint8 *)(ptr))[2]) << 16) \ + | ((uint32)(((const uint8 *)(ptr))[3]) << 24) ) + #endif /* WORDS_BIGENDIAN */ #define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data) @@ -282,7 +326,7 @@ typedef struct #define VARDATA_SHORT(PTR) VARDATA_1B(PTR) #define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR) -#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR))) +#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(PTR)) #define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR) #define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) @@ -328,20 +372,34 @@ typedef struct #define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \ (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK) #define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) + ( (VARATT_IS_4BCE(PTR)) ? VARATT_4BCE_GET_CMID(((varattrib_4b *) (PTR))->va_compressed_ext.va_ecinfo) \ + : (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS)) /* Same for external Datums; but note argument is a struct varatt_external */ #define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \ ((toast_pointer).va_extinfo & VARLENA_EXTSIZE_MASK) -#define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \ - ((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS) - -#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \ - do { \ - Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm) == TOAST_LZ4_COMPRESSION_ID); \ - ((toast_pointer).va_extinfo = \ - (len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \ +#define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \ + ( ((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS) == VARATT_4BCE_EXTFLAG \ + ? VARATT_4BCE_GET_CMID((toast_pointer).extended.cmp.va_ecinfo) \ + : (toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS ) + +#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm, meta) \ + do { \ + Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm) == TOAST_LZ4_COMPRESSION_ID || \ + (cm) == TOAST_ZSTD_COMPRESSION_ID); \ + if (!TOAST_CMPID_EXTENDED(cm)) \ + { \ + /* method fits in the low bits of va_extinfo */ \ + (toast_pointer).va_extinfo = (uint32)(len) | ((uint32)(cm) << VARLENA_EXTSIZE_BITS);\ + } \ + else \ + { \ + /* set “extended” flag and store the extra byte */ \ + (toast_pointer).va_extinfo = (uint32)(len) | \ + (VARATT_4BCE_EXTFLAG << VARLENA_EXTSIZE_BITS); \ + (toast_pointer).extended.cmp.va_ecinfo = VARATT_4BCE_ENCODE(meta, cm); \ + } \ } while (0) /* @@ -355,4 +413,29 @@ typedef struct (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \ (toast_pointer).va_rawsize - VARHDRSZ) +/* Upper-two-bit pattern 0b11 marks “extended compression info present”. */ +#define VARATT_4BCE_EXTFLAG 0x3 + +/* Helper: pack into a single byte: flag (b0), cmid-2 (b1..7) */ +#define VARATT_4BCE_ENCODE(flag, cmid) \ + ((uint8)((((cmid) - 2) << 1) | ((flag) & 0x01))) + +#define VARATT_4BCE_HAS_META(raw) ((raw) & 0x01u) +#define VARATT_4BCE_GET_CMID(raw) ((((raw) >> 1) & 0x7Fu) + 2) + +/* Pointer-level helpers */ +#define VARATT_4BCE_PTR_HAS_META(ptr) \ + VARATT_4BCE_HAS_META(((varattrib_4b *)(ptr))->va_compressed_ext.va_ecinfo) + +/* Does this varattrib use the “compressed-extended” format? */ +#define VARATT_IS_4BCE(ptr) \ + ((((varattrib_4b *)(ptr))->va_compressed_ext.va_tcinfo >> VARLENA_EXTSIZE_BITS) \ + == VARATT_4BCE_EXTFLAG) + +/* Access the start of the compressed payload */ +#define VARDATA_4BCE(ptr) \ + (((varattrib_4b *)(ptr))->va_compressed_ext.va_data) + +#define VARHDRSZ_4BCE (offsetof(varattrib_4b, va_compressed_ext.va_data)) + #endif diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out index 4dd9ee7200d1..94495388adec 100644 --- a/src/test/regress/expected/compression.out +++ b/src/test/regress/expected/compression.out @@ -238,10 +238,11 @@ NOTICE: merging multiple inherited definitions of column "f1" -- test default_toast_compression GUC SET default_toast_compression = ''; ERROR: invalid value for parameter "default_toast_compression": "" -HINT: Available values: pglz, lz4. +HINT: Available values: pglz, lz4, zstd. SET default_toast_compression = 'I do not exist compression'; ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" -HINT: Available values: pglz, lz4. +HINT: Available values: pglz, lz4, zstd. +SET default_toast_compression = 'zstd'; SET default_toast_compression = 'lz4'; SET default_toast_compression = 'pglz'; -- test alter compression method diff --git a/src/test/regress/expected/compression_1.out b/src/test/regress/expected/compression_1.out index 7bd7642b4b94..0ce491521767 100644 --- a/src/test/regress/expected/compression_1.out +++ b/src/test/regress/expected/compression_1.out @@ -233,6 +233,9 @@ HINT: Available values: pglz. SET default_toast_compression = 'I do not exist compression'; ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" HINT: Available values: pglz. +SET default_toast_compression = 'zstd'; +ERROR: invalid value for parameter "default_toast_compression": "zstd" +HINT: Available values: pglz. SET default_toast_compression = 'lz4'; ERROR: invalid value for parameter "default_toast_compression": "lz4" HINT: Available values: pglz. diff --git a/src/test/regress/expected/compression_zstd.out b/src/test/regress/expected/compression_zstd.out new file mode 100644 index 000000000000..5a05a3e6d540 --- /dev/null +++ b/src/test/regress/expected/compression_zstd.out @@ -0,0 +1,198 @@ +\set HIDE_TOAST_COMPRESSION false +-- Ensure stable results regardless of the installation's default. +SET default_toast_compression = 'pglz'; +---------------------------------------------------------------- +-- 1. Create Test Table with Zstd Compression +---------------------------------------------------------------- +DROP TABLE IF EXISTS cmdata_zstd_nodict CASCADE; +NOTICE: table "cmdata_zstd_nodict" does not exist, skipping +CREATE TABLE cmdata_zstd_nodict ( + f1 TEXT COMPRESSION zstd +); +---------------------------------------------------------------- +-- 2. Insert Data Rows +---------------------------------------------------------------- +DO $$ +BEGIN + FOR i IN 1..15 LOOP + INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('1234567890', 1004)); -- inline + INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('1234567890', 2500000)); -- externally stored + END LOOP; +END $$; +---------------------------------------------------------------- +-- 3. Verify Table Structure and Compression Settings +---------------------------------------------------------------- +-- Table Structure for cmdata_zstd +\d+ cmdata_zstd_nodict; + Table "public.cmdata_zstd_nodict" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | + +-- Compression Settings for f1 Column +SELECT pg_column_compression(f1) AS compression_method, + count(*) AS row_count +FROM cmdata_zstd_nodict +GROUP BY pg_column_compression(f1); + compression_method | row_count +--------------------+----------- + zstd | 30 +(1 row) + +---------------------------------------------------------------- +-- 4. Decompression Tests +---------------------------------------------------------------- +-- Decompression Slice Test (Extracting Substrings) +SELECT SUBSTR(f1, 200, 50) AS data_slice +FROM cmdata_zstd_nodict; + data_slice +---------------------------------------------------- + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 +(30 rows) + +---------------------------------------------------------------- +-- 5. Test Table Creation with LIKE INCLUDING COMPRESSION +---------------------------------------------------------------- +DROP TABLE IF EXISTS cmdata_zstd_nodict_2; +NOTICE: table "cmdata_zstd_nodict_2" does not exist, skipping +CREATE TABLE cmdata_zstd_nodict_2 (LIKE cmdata_zstd_nodict INCLUDING COMPRESSION); +-- Table Structure for cmdata_zstd_2 +\d+ cmdata_zstd_nodict_2; + Table "public.cmdata_zstd_nodict_2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | + +DROP TABLE cmdata_zstd_nodict_2; +---------------------------------------------------------------- +-- 6. Materialized View Compression Test +---------------------------------------------------------------- +DROP MATERIALIZED VIEW IF EXISTS compressmv_zstd_nodict; +NOTICE: materialized view "compressmv_zstd_nodict" does not exist, skipping +CREATE MATERIALIZED VIEW compressmv_zstd_nodict AS + SELECT f1 FROM cmdata_zstd_nodict; +-- Materialized View Structure for compressmv_zstd +\d+ compressmv_zstd_nodict; + Materialized view "public.compressmv_zstd_nodict" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | | | +View definition: + SELECT f1 + FROM cmdata_zstd_nodict; + +-- Materialized View Compression Check +SELECT pg_column_compression(f1) AS mv_compression +FROM compressmv_zstd_nodict; + mv_compression +---------------- + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd + zstd +(30 rows) + +---------------------------------------------------------------- +-- 7. Additional Updates and Round-Trip Tests +---------------------------------------------------------------- +-- Update some rows to check if the dictionary remains effective after modifications. +UPDATE cmdata_zstd_nodict +SET f1 = f1 || ' UPDATED'; +-- Verification of Updated Rows +SELECT SUBSTR(f1, LENGTH(f1) - 7 + 1, 7) AS preview +FROM cmdata_zstd_nodict; + preview +--------- + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED + UPDATED +(30 rows) + +---------------------------------------------------------------- +-- 8. Clean Up +---------------------------------------------------------------- +DROP MATERIALIZED VIEW compressmv_zstd_nodict; +DROP TABLE cmdata_zstd_nodict; +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/expected/compression_zstd_1.out b/src/test/regress/expected/compression_zstd_1.out new file mode 100644 index 000000000000..ae87a0b652f4 --- /dev/null +++ b/src/test/regress/expected/compression_zstd_1.out @@ -0,0 +1,104 @@ +\set HIDE_TOAST_COMPRESSION false +-- Ensure stable results regardless of the installation's default. +SET default_toast_compression = 'pglz'; +---------------------------------------------------------------- +-- 1. Create Test Table with Zstd Compression +---------------------------------------------------------------- +DROP TABLE IF EXISTS cmdata_zstd_nodict CASCADE; +NOTICE: table "cmdata_zstd_nodict" does not exist, skipping +CREATE TABLE cmdata_zstd_nodict ( + f1 TEXT COMPRESSION zstd +); +ERROR: compression method zstd not supported +DETAIL: This functionality requires the server to be built with zstd support. +---------------------------------------------------------------- +-- 2. Insert Data Rows +---------------------------------------------------------------- +DO $$ +BEGIN + FOR i IN 1..15 LOOP + INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('1234567890', 1004)); -- inline + INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('1234567890', 2500000)); -- externally stored + END LOOP; +END $$; +ERROR: relation "cmdata_zstd_nodict" does not exist +LINE 1: INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('12345678... + ^ +QUERY: INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('1234567890', 1004)) +CONTEXT: PL/pgSQL function inline_code_block line 4 at SQL statement +---------------------------------------------------------------- +-- 3. Verify Table Structure and Compression Settings +---------------------------------------------------------------- +-- Table Structure for cmdata_zstd +\d+ cmdata_zstd_nodict; +-- Compression Settings for f1 Column +SELECT pg_column_compression(f1) AS compression_method, + count(*) AS row_count +FROM cmdata_zstd_nodict +GROUP BY pg_column_compression(f1); +ERROR: relation "cmdata_zstd_nodict" does not exist +LINE 3: FROM cmdata_zstd_nodict + ^ +---------------------------------------------------------------- +-- 4. Decompression Tests +---------------------------------------------------------------- +-- Decompression Slice Test (Extracting Substrings) +SELECT SUBSTR(f1, 200, 50) AS data_slice +FROM cmdata_zstd_nodict; +ERROR: relation "cmdata_zstd_nodict" does not exist +LINE 2: FROM cmdata_zstd_nodict; + ^ +---------------------------------------------------------------- +-- 5. Test Table Creation with LIKE INCLUDING COMPRESSION +---------------------------------------------------------------- +DROP TABLE IF EXISTS cmdata_zstd_nodict_2; +NOTICE: table "cmdata_zstd_nodict_2" does not exist, skipping +CREATE TABLE cmdata_zstd_nodict_2 (LIKE cmdata_zstd_nodict INCLUDING COMPRESSION); +ERROR: relation "cmdata_zstd_nodict" does not exist +LINE 1: CREATE TABLE cmdata_zstd_nodict_2 (LIKE cmdata_zstd_nodict I... + ^ +-- Table Structure for cmdata_zstd_2 +\d+ cmdata_zstd_nodict_2; +DROP TABLE cmdata_zstd_nodict_2; +ERROR: table "cmdata_zstd_nodict_2" does not exist +---------------------------------------------------------------- +-- 6. Materialized View Compression Test +---------------------------------------------------------------- +DROP MATERIALIZED VIEW IF EXISTS compressmv_zstd_nodict; +NOTICE: materialized view "compressmv_zstd_nodict" does not exist, skipping +CREATE MATERIALIZED VIEW compressmv_zstd_nodict AS + SELECT f1 FROM cmdata_zstd_nodict; +ERROR: relation "cmdata_zstd_nodict" does not exist +LINE 2: SELECT f1 FROM cmdata_zstd_nodict; + ^ +-- Materialized View Structure for compressmv_zstd +\d+ compressmv_zstd_nodict; +-- Materialized View Compression Check +SELECT pg_column_compression(f1) AS mv_compression +FROM compressmv_zstd_nodict; +ERROR: relation "compressmv_zstd_nodict" does not exist +LINE 2: FROM compressmv_zstd_nodict; + ^ +---------------------------------------------------------------- +-- 7. Additional Updates and Round-Trip Tests +---------------------------------------------------------------- +-- Update some rows to check if the dictionary remains effective after modifications. +UPDATE cmdata_zstd_nodict +SET f1 = f1 || ' UPDATED'; +ERROR: relation "cmdata_zstd_nodict" does not exist +LINE 1: UPDATE cmdata_zstd_nodict + ^ +-- Verification of Updated Rows +SELECT SUBSTR(f1, LENGTH(f1) - 7 + 1, 7) AS preview +FROM cmdata_zstd_nodict; +ERROR: relation "cmdata_zstd_nodict" does not exist +LINE 2: FROM cmdata_zstd_nodict; + ^ +---------------------------------------------------------------- +-- 8. Clean Up +---------------------------------------------------------------- +DROP MATERIALIZED VIEW compressmv_zstd_nodict; +ERROR: materialized view "compressmv_zstd_nodict" does not exist +DROP TABLE cmdata_zstd_nodict; +ERROR: table "cmdata_zstd_nodict" does not exist +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index a424be2a6bf0..7e1d227b976d 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -123,7 +123,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate numa +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_zstd memoize stats predicate numa # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/compression.sql b/src/test/regress/sql/compression.sql index 490595fcfb26..e29909558f98 100644 --- a/src/test/regress/sql/compression.sql +++ b/src/test/regress/sql/compression.sql @@ -102,6 +102,7 @@ CREATE TABLE cminh() INHERITS (cmdata, cmdata3); -- test default_toast_compression GUC SET default_toast_compression = ''; SET default_toast_compression = 'I do not exist compression'; +SET default_toast_compression = 'zstd'; SET default_toast_compression = 'lz4'; SET default_toast_compression = 'pglz'; diff --git a/src/test/regress/sql/compression_zstd.sql b/src/test/regress/sql/compression_zstd.sql new file mode 100644 index 000000000000..4200d9a78fc4 --- /dev/null +++ b/src/test/regress/sql/compression_zstd.sql @@ -0,0 +1,83 @@ +\set HIDE_TOAST_COMPRESSION false + +-- Ensure stable results regardless of the installation's default. +SET default_toast_compression = 'pglz'; + +---------------------------------------------------------------- +-- 1. Create Test Table with Zstd Compression +---------------------------------------------------------------- +DROP TABLE IF EXISTS cmdata_zstd_nodict CASCADE; +CREATE TABLE cmdata_zstd_nodict ( + f1 TEXT COMPRESSION zstd +); + +---------------------------------------------------------------- +-- 2. Insert Data Rows +---------------------------------------------------------------- +DO $$ +BEGIN + FOR i IN 1..15 LOOP + INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('1234567890', 1004)); -- inline + INSERT INTO cmdata_zstd_nodict (f1) VALUES (repeat('1234567890', 2500000)); -- externally stored + END LOOP; +END $$; + +---------------------------------------------------------------- +-- 3. Verify Table Structure and Compression Settings +---------------------------------------------------------------- +-- Table Structure for cmdata_zstd +\d+ cmdata_zstd_nodict; + +-- Compression Settings for f1 Column +SELECT pg_column_compression(f1) AS compression_method, + count(*) AS row_count +FROM cmdata_zstd_nodict +GROUP BY pg_column_compression(f1); + +---------------------------------------------------------------- +-- 4. Decompression Tests +---------------------------------------------------------------- +-- Decompression Slice Test (Extracting Substrings) +SELECT SUBSTR(f1, 200, 50) AS data_slice +FROM cmdata_zstd_nodict; + +---------------------------------------------------------------- +-- 5. Test Table Creation with LIKE INCLUDING COMPRESSION +---------------------------------------------------------------- +DROP TABLE IF EXISTS cmdata_zstd_nodict_2; +CREATE TABLE cmdata_zstd_nodict_2 (LIKE cmdata_zstd_nodict INCLUDING COMPRESSION); +-- Table Structure for cmdata_zstd_2 +\d+ cmdata_zstd_nodict_2; +DROP TABLE cmdata_zstd_nodict_2; + +---------------------------------------------------------------- +-- 6. Materialized View Compression Test +---------------------------------------------------------------- +DROP MATERIALIZED VIEW IF EXISTS compressmv_zstd_nodict; +CREATE MATERIALIZED VIEW compressmv_zstd_nodict AS + SELECT f1 FROM cmdata_zstd_nodict; + +-- Materialized View Structure for compressmv_zstd +\d+ compressmv_zstd_nodict; + +-- Materialized View Compression Check +SELECT pg_column_compression(f1) AS mv_compression +FROM compressmv_zstd_nodict; + +---------------------------------------------------------------- +-- 7. Additional Updates and Round-Trip Tests +---------------------------------------------------------------- +-- Update some rows to check if the dictionary remains effective after modifications. +UPDATE cmdata_zstd_nodict +SET f1 = f1 || ' UPDATED'; + +-- Verification of Updated Rows +SELECT SUBSTR(f1, LENGTH(f1) - 7 + 1, 7) AS preview +FROM cmdata_zstd_nodict; +---------------------------------------------------------------- +-- 8. Clean Up +---------------------------------------------------------------- +DROP MATERIALIZED VIEW compressmv_zstd_nodict; +DROP TABLE cmdata_zstd_nodict; + +\set HIDE_TOAST_COMPRESSION true diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a8346cda633a..9be747103b87 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -483,6 +483,7 @@ CompositeIOData CompositeTypeStmt CompoundAffixFlag CompressFileHandle +CompressionInfo CompressionLocation CompressorState ComputeXidHorizonsResult @@ -4097,7 +4098,6 @@ timeout_handler_proc timeout_params timerCA tlist_vinfo -toast_compress_header tokenize_error_callback_arg transferMode transfer_thread_arg