Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2820adf

Browse files
committed
Support long distance matching for zstd compression
zstd compression supports a special mode for finding matched in distant past, which may result in better compression ratio, at the expense of using more memory (the window size is 128MB). To enable this optional mode, use the "long" keyword when specifying the compression method (--compress=zstd:long). Author: Justin Pryzby Reviewed-by: Tomas Vondra, Jacob Champion Discussion: https://postgr.es/m/20230224191840.GD1653@telsasoft.com Discussion: https://postgr.es/m/20220327205020.GM28503@telsasoft.com
1 parent 983ec23 commit 2820adf

File tree

12 files changed

+127
-6
lines changed

12 files changed

+127
-6
lines changed

doc/src/sgml/protocol.sgml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2729,7 +2729,8 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;"
27292729
level. Otherwise, it should be a comma-separated list of items,
27302730
each of the form <replaceable>keyword</replaceable> or
27312731
<replaceable>keyword=value</replaceable>. Currently, the supported
2732-
keywords are <literal>level</literal> and <literal>workers</literal>.
2732+
keywords are <literal>level</literal>, <literal>long</literal> and
2733+
<literal>workers</literal>.
27332734
</para>
27342735

27352736
<para>
@@ -2746,6 +2747,13 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;"
27462747
<literal>3</literal>).
27472748
</para>
27482749

2750+
<para>
2751+
The <literal>long</literal> keyword enables long-distance matching
2752+
mode, for improved compression ratio, at the expense of higher memory
2753+
use. Long-distance mode is supported only for
2754+
<literal>zstd</literal>.
2755+
</para>
2756+
27492757
<para>
27502758
The <literal>workers</literal> keyword sets the number of threads
27512759
that should be used for parallel compression. Parallel compression

doc/src/sgml/ref/pg_basebackup.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,8 @@ PostgreSQL documentation
424424
level. Otherwise, it should be a comma-separated list of items,
425425
each of the form <literal>keyword</literal> or
426426
<literal>keyword=value</literal>.
427-
Currently, the supported keywords are <literal>level</literal>
428-
and <literal>workers</literal>.
427+
Currently, the supported keywords are <literal>level</literal>,
428+
<literal>long</literal>, and <literal>workers</literal>.
429429
The detail string cannot be used when the compression method
430430
is specified as a plain integer.
431431
</para>

doc/src/sgml/ref/pg_dump.sgml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,8 @@ PostgreSQL documentation
681681
as though it had been fed through <application>gzip</application>,
682682
<application>lz4</application>, or <application>zstd</application>;
683683
but the default is not to compress.
684+
With zstd compression, <literal>long</literal> mode may improve the
685+
compression ratio, at the cost of increased memory use.
684686
</para>
685687
<para>
686688
The tar archive format currently does not support compression at all.

src/backend/backup/basebackup_zstd.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,18 @@ bbsink_zstd_begin_backup(bbsink *sink)
118118
compress->workers, ZSTD_getErrorName(ret)));
119119
}
120120

121+
if ((compress->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0)
122+
{
123+
ret = ZSTD_CCtx_setParameter(mysink->cctx,
124+
ZSTD_c_enableLongDistanceMatching,
125+
compress->long_distance);
126+
if (ZSTD_isError(ret))
127+
ereport(ERROR,
128+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
129+
errmsg("could not set compression flag for %s: %s",
130+
"long", ZSTD_getErrorName(ret)));
131+
}
132+
121133
/*
122134
* We need our own buffer, because we're going to pass different data to
123135
* the next sink than what gets passed to us.

src/bin/pg_basebackup/bbstreamer_zstd.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,19 @@ bbstreamer_zstd_compressor_new(bbstreamer *next, pg_compress_specification *comp
106106
compress->workers, ZSTD_getErrorName(ret));
107107
}
108108

109+
if ((compress->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0)
110+
{
111+
ret = ZSTD_CCtx_setParameter(streamer->cctx,
112+
ZSTD_c_enableLongDistanceMatching,
113+
compress->long_distance);
114+
if (ZSTD_isError(ret))
115+
{
116+
pg_log_error("could not set compression flag for %s: %s",
117+
"long", ZSTD_getErrorName(ret));
118+
exit(1);
119+
}
120+
}
121+
109122
/* Initialize the ZSTD output buffer. */
110123
streamer->zstd_outBuf.dst = streamer->base.bbs_buffer.data;
111124
streamer->zstd_outBuf.size = streamer->base.bbs_buffer.maxlen;

src/bin/pg_basebackup/t/010_pg_basebackup.pl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,14 @@
139139
'gzip:workers=3',
140140
'invalid compression specification: compression algorithm "gzip" does not accept a worker count',
141141
'failure on worker count for gzip'
142-
],);
142+
],
143+
[
144+
'gzip:long',
145+
'invalid compression specification: compression algorithm "gzip" does not support long-distance mode',
146+
'failure on long mode for gzip'
147+
],
148+
);
149+
143150
for my $cft (@compression_failure_tests)
144151
{
145152
my $cfail = quotemeta($client_fails . $cft->[1]);

src/bin/pg_dump/compress_zstd.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ _ZstdCStreamParams(pg_compress_specification compress)
8080
_Zstd_CCtx_setParam_or_die(cstream, ZSTD_c_compressionLevel,
8181
compress.level, "level");
8282

83+
if (compress.options & PG_COMPRESSION_OPTION_LONG_DISTANCE)
84+
_Zstd_CCtx_setParam_or_die(cstream,
85+
ZSTD_c_enableLongDistanceMatching,
86+
compress.long_distance, "long");
87+
8388
return cstream;
8489
}
8590

src/bin/pg_dump/t/002_pg_dump.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,12 @@
267267
],
268268
},
269269

270+
# Exercise long mode for test coverage
270271
compression_zstd_plain => {
271272
test_key => 'compression',
272273
compile_option => 'zstd',
273274
dump_cmd => [
274-
'pg_dump', '--format=plain', '--compress=zstd',
275+
'pg_dump', '--format=plain', '--compress=zstd:long',
275276
"--file=$tempdir/compression_zstd_plain.sql.zst", 'postgres',
276277
],
277278
# Decompress the generated file to run through the tests.

src/bin/pg_verifybackup/t/008_untar.pl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@
4949
'decompress_program' => $ENV{'ZSTD'},
5050
'decompress_flags' => ['-d'],
5151
'enabled' => check_pg_config("#define USE_ZSTD 1")
52+
},
53+
{
54+
'compression_method' => 'zstd',
55+
'backup_flags' => [ '--compress', 'server-zstd:level=1,long' ],
56+
'backup_archive' => 'base.tar.zst',
57+
'decompress_program' => $ENV{'ZSTD'},
58+
'decompress_flags' => ['-d'],
59+
'enabled' => check_pg_config("#define USE_ZSTD 1")
5260
});
5361

5462
for my $tc (@test_configuration)

src/bin/pg_verifybackup/t/010_client_untar.pl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@
5050
'decompress_flags' => ['-d'],
5151
'enabled' => check_pg_config("#define USE_ZSTD 1")
5252
},
53+
{
54+
'compression_method' => 'zstd',
55+
'backup_flags' => ['--compress', 'client-zstd:level=1,long'],
56+
'backup_archive' => 'base.tar.zst',
57+
'decompress_program' => $ENV{'ZSTD'},
58+
'decompress_flags' => [ '-d' ],
59+
'enabled' => check_pg_config("#define USE_ZSTD 1")
60+
},
5361
{
5462
'compression_method' => 'parallel zstd',
5563
'backup_flags' => [ '--compress', 'client-zstd:workers=3' ],

src/common/compression.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Otherwise, a compression specification is a comma-separated list of items,
1313
* each having the form keyword or keyword=value.
1414
*
15-
* Currently, the only supported keywords are "level" and "workers".
15+
* Currently, the supported keywords are "level", "long", and "workers".
1616
*
1717
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
1818
*
@@ -38,6 +38,8 @@
3838

3939
static int expect_integer_value(char *keyword, char *value,
4040
pg_compress_specification *result);
41+
static bool expect_boolean_value(char *keyword, char *value,
42+
pg_compress_specification *result);
4143

4244
/*
4345
* Look up a compression algorithm by name. Returns true and sets *algorithm
@@ -232,6 +234,11 @@ parse_compress_specification(pg_compress_algorithm algorithm, char *specificatio
232234
result->workers = expect_integer_value(keyword, value, result);
233235
result->options |= PG_COMPRESSION_OPTION_WORKERS;
234236
}
237+
else if (strcmp(keyword, "long") == 0)
238+
{
239+
result->long_distance = expect_boolean_value(keyword, value, result);
240+
result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE;
241+
}
235242
else
236243
result->parse_error =
237244
psprintf(_("unrecognized compression option: \"%s\""), keyword);
@@ -289,6 +296,43 @@ expect_integer_value(char *keyword, char *value, pg_compress_specification *resu
289296
return ivalue;
290297
}
291298

299+
/*
300+
* Parse 'value' as a boolean and return the result.
301+
*
302+
* If parsing fails, set result->parse_error to an appropriate message
303+
* and return -1. The caller must check result->parse_error to determine if
304+
* the call was successful.
305+
*
306+
* Valid values are: yes, no, on, off, 1, 0.
307+
*
308+
* Inspired by ParseVariableBool().
309+
*/
310+
static bool
311+
expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
312+
{
313+
if (value == NULL)
314+
return true;
315+
316+
if (pg_strcasecmp(value, "yes") == 0)
317+
return true;
318+
if (pg_strcasecmp(value, "on") == 0)
319+
return true;
320+
if (pg_strcasecmp(value, "1") == 0)
321+
return true;
322+
323+
if (pg_strcasecmp(value, "no") == 0)
324+
return false;
325+
if (pg_strcasecmp(value, "off") == 0)
326+
return false;
327+
if (pg_strcasecmp(value, "0") == 0)
328+
return false;
329+
330+
result->parse_error =
331+
psprintf(_("value for compression option \"%s\" must be a boolean"),
332+
keyword);
333+
return false;
334+
}
335+
292336
/*
293337
* Returns NULL if the compression specification string was syntactically
294338
* valid and semantically sensible. Otherwise, returns an error message.
@@ -354,6 +398,17 @@ validate_compress_specification(pg_compress_specification *spec)
354398
get_compress_algorithm_name(spec->algorithm));
355399
}
356400

401+
/*
402+
* Of the compression algorithms that we currently support, only zstd
403+
* supports long-distance mode.
404+
*/
405+
if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
406+
(spec->algorithm != PG_COMPRESSION_ZSTD))
407+
{
408+
return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
409+
get_compress_algorithm_name(spec->algorithm));
410+
}
411+
357412
return NULL;
358413
}
359414

src/include/common/compression.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,15 @@ typedef enum pg_compress_algorithm
2727
} pg_compress_algorithm;
2828

2929
#define PG_COMPRESSION_OPTION_WORKERS (1 << 0)
30+
#define PG_COMPRESSION_OPTION_LONG_DISTANCE (1 << 1)
3031

3132
typedef struct pg_compress_specification
3233
{
3334
pg_compress_algorithm algorithm;
3435
unsigned options; /* OR of PG_COMPRESSION_OPTION constants */
3536
int level;
3637
int workers;
38+
bool long_distance;
3739
char *parse_error; /* NULL if parsing was OK, else message */
3840
} pg_compress_specification;
3941

0 commit comments

Comments
 (0)