Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Add suport for server-side LZ4 base backup compression.
authorRobert Haas <rhaas@postgresql.org>
Fri, 11 Feb 2022 13:29:38 +0000 (08:29 -0500)
committerRobert Haas <rhaas@postgresql.org>
Fri, 11 Feb 2022 13:29:38 +0000 (08:29 -0500)
LZ4 compression can be a lot faster than gzip compression, so users
may prefer it even if the compression ratio is not as good. We will
want pg_basebackup to support LZ4 compression and decompression on the
client side as well, and there is a pending patch for that, but it's
by a different author, so I am committing this part separately for
that reason.

Jeevan Ladhe, reviewed by Tushar Ahuja and by me.

Discussion: http://postgr.es/m/CANm22Cg9cArXEaYgHVZhCnzPLfqXCZLAzjwTq7Fc0quXRPfbxA@mail.gmail.com

doc/src/sgml/protocol.sgml
doc/src/sgml/ref/pg_basebackup.sgml
src/backend/replication/Makefile
src/backend/replication/basebackup.c
src/backend/replication/basebackup_lz4.c [new file with mode: 0644]
src/bin/pg_basebackup/pg_basebackup.c
src/bin/pg_verifybackup/Makefile
src/bin/pg_verifybackup/t/008_untar.pl
src/include/replication/basebackup_sink.h

index fd03c860bdf57552de5b85cb15e8e757909abd4f..1c5ab00879191aff0f32f5bfc59047e569a7b974 100644 (file)
@@ -2724,8 +2724,8 @@ The commands accepted in replication mode are:
         <listitem>
          <para>
           Instructs the server to compress the backup using the specified
-          method. Currently, the only supported method is
-          <literal>gzip</literal>.
+          method. Currently, the supported methods are <literal>gzip</literal>
+          and <literal>lz4</literal>.
          </para>
         </listitem>
        </varlistentry>
@@ -2736,7 +2736,8 @@ The commands accepted in replication mode are:
          <para>
           Specifies the compression level to be used. This should only be
           used in conjunction with the <literal>COMPRESSION</literal> option.
-          The value should be an integer between 1 and 9.
+          For <literal>gzip</literal> the value should be an integer between 1
+          and 9, and for <literal>lz4</literal> it should be between 1 and 12.
          </para>
         </listitem>
        </varlistentry>
index e7ae29ec3d36d8c0ff5b1de22f8fcf0bdfbd3021..7a1b432eba33bc63ccaf6ceda6c50bf2f261b3dc 100644 (file)
@@ -417,10 +417,13 @@ PostgreSQL documentation
         specify <literal>-Xfetch</literal>.
        </para>
        <para>
-        The compression method can be set to either <literal>gzip</literal>
-        for compression with <application>gzip</application>, or
-        <literal>none</literal> for no compression. A compression level
-        can be optionally specified, by appending the level number after a
+        The compression method can be set to <literal>gzip</literal> for
+        compression with <application>gzip</application>, or
+        <literal>lz4</literal> for compression with
+        <application>lz4</application>, or <literal>none</literal> for no
+        compression. However, <literal>lz4</literal> can be currently only
+        used with <literal>server</literal>. A compression level can be
+        optionally specified, by appending the level number after a
         colon (<literal>:</literal>). If no level is specified, the default
         compression level will be used. If only a level is specified without
         mentioning an algorithm, <literal>gzip</literal> compression will
@@ -428,12 +431,13 @@ PostgreSQL documentation
         used if the level is 0.
        </para>
        <para>
-        When the tar format is used, the suffix <filename>.gz</filename> will
-        automatically be added to all tar filenames. When the plain format is
-        used, client-side compression may not be specified, but it is
-        still possible to request server-side compression. If this is done,
-        the server will compress the backup for transmission, and the
-        client will decompress and extract it.
+        When the tar format is used with <literal>gzip</literal> or
+        <literal>lz4</literal>, the suffix <filename>.gz</filename> or
+        <filename>.lz4</filename> will automatically be added to all tar
+        filenames. When the plain format is used, client-side compression may
+        not be specified, but it is still possible to request server-side
+        compression. If this is done, the server will compress the backup for
+        transmission, and the client will decompress and extract it.
        </para>
       </listitem>
      </varlistentry>
index 8ec60ded7620b12528cdd0f96db3533268e0ec3c..74043ff331d53652ba77cb4f06a61266282acf73 100644 (file)
@@ -19,6 +19,7 @@ OBJS = \
    basebackup.o \
    basebackup_copy.o \
    basebackup_gzip.o \
+   basebackup_lz4.o \
    basebackup_progress.o \
    basebackup_server.o \
    basebackup_sink.o \
index fcd9161f749e21e27d9bf184854cfb57121c7f3f..0bf28b55d7f6f9f0cd36064cce8ca570231e2fff 100644 (file)
@@ -63,7 +63,8 @@ typedef enum
 typedef enum
 {
    BACKUP_COMPRESSION_NONE,
-   BACKUP_COMPRESSION_GZIP
+   BACKUP_COMPRESSION_GZIP,
+   BACKUP_COMPRESSION_LZ4
 } basebackup_compression_type;
 
 typedef struct
@@ -903,6 +904,8 @@ parse_basebackup_options(List *options, basebackup_options *opt)
                opt->compression = BACKUP_COMPRESSION_NONE;
            else if (strcmp(optval, "gzip") == 0)
                opt->compression = BACKUP_COMPRESSION_GZIP;
+           else if (strcmp(optval, "lz4") == 0)
+               opt->compression = BACKUP_COMPRESSION_LZ4;
            else
                ereport(ERROR,
                        (errcode(ERRCODE_SYNTAX_ERROR),
@@ -1021,6 +1024,8 @@ SendBaseBackup(BaseBackupCmd *cmd)
    /* Set up server-side compression, if client requested it */
    if (opt.compression == BACKUP_COMPRESSION_GZIP)
        sink = bbsink_gzip_new(sink, opt.compression_level);
+   else if (opt.compression == BACKUP_COMPRESSION_LZ4)
+       sink = bbsink_lz4_new(sink, opt.compression_level);
 
    /* Set up progress reporting. */
    sink = bbsink_progress_new(sink, opt.progress);
diff --git a/src/backend/replication/basebackup_lz4.c b/src/backend/replication/basebackup_lz4.c
new file mode 100644 (file)
index 0000000..8730ee8
--- /dev/null
@@ -0,0 +1,298 @@
+/*-------------------------------------------------------------------------
+ *
+ * basebackup_lz4.c
+ *   Basebackup sink implementing lz4 compression.
+ *
+ * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *   src/backend/replication/basebackup_lz4.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#ifdef HAVE_LIBLZ4
+#include <lz4frame.h>
+#endif
+
+#include "replication/basebackup_sink.h"
+
+#ifdef HAVE_LIBLZ4
+
+typedef struct bbsink_lz4
+{
+   /* Common information for all types of sink. */
+   bbsink      base;
+
+   /* Compression level. */
+   int         compresslevel;
+
+   LZ4F_compressionContext_t ctx;
+   LZ4F_preferences_t prefs;
+
+   /* Number of bytes staged in output buffer. */
+   size_t      bytes_written;
+} bbsink_lz4;
+
+static void bbsink_lz4_begin_backup(bbsink *sink);
+static void bbsink_lz4_begin_archive(bbsink *sink, const char *archive_name);
+static void bbsink_lz4_archive_contents(bbsink *sink, size_t avail_in);
+static void bbsink_lz4_manifest_contents(bbsink *sink, size_t len);
+static void bbsink_lz4_end_archive(bbsink *sink);
+static void bbsink_lz4_cleanup(bbsink *sink);
+
+const bbsink_ops bbsink_lz4_ops = {
+   .begin_backup = bbsink_lz4_begin_backup,
+   .begin_archive = bbsink_lz4_begin_archive,
+   .archive_contents = bbsink_lz4_archive_contents,
+   .end_archive = bbsink_lz4_end_archive,
+   .begin_manifest = bbsink_forward_begin_manifest,
+   .manifest_contents = bbsink_lz4_manifest_contents,
+   .end_manifest = bbsink_forward_end_manifest,
+   .end_backup = bbsink_forward_end_backup,
+   .cleanup = bbsink_lz4_cleanup
+};
+#endif
+
+/*
+ * Create a new basebackup sink that performs lz4 compression using the
+ * designated compression level.
+ */
+bbsink *
+bbsink_lz4_new(bbsink *next, int compresslevel)
+{
+#ifndef HAVE_LIBLZ4
+   ereport(ERROR,
+           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("lz4 compression is not supported by this build")));
+#else
+   bbsink_lz4 *sink;
+
+   Assert(next != NULL);
+
+   if (compresslevel < 0 || compresslevel > 12)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("lz4 compression level %d is out of range",
+                       compresslevel)));
+
+   sink = palloc0(sizeof(bbsink_lz4));
+   *((const bbsink_ops **) &sink->base.bbs_ops) = &bbsink_lz4_ops;
+   sink->base.bbs_next = next;
+   sink->compresslevel = compresslevel;
+
+   return &sink->base;
+#endif
+}
+
+#ifdef HAVE_LIBLZ4
+
+/*
+ * Begin backup.
+ */
+static void
+bbsink_lz4_begin_backup(bbsink *sink)
+{
+   bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+   size_t      output_buffer_bound;
+   LZ4F_preferences_t *prefs = &mysink->prefs;
+
+   /* Initialize compressor object. */
+   memset(prefs, 0, sizeof(LZ4F_preferences_t));
+   prefs->frameInfo.blockSizeID = LZ4F_max256KB;
+   prefs->compressionLevel = mysink->compresslevel;
+
+   /*
+    * We need our own buffer, because we're going to pass different data to
+    * the next sink than what gets passed to us.
+    */
+   mysink->base.bbs_buffer = palloc(mysink->base.bbs_buffer_length);
+
+   /*
+    * Since LZ4F_compressUpdate() requires the output buffer of size equal or
+    * greater than that of LZ4F_compressBound(), make sure we have the next
+    * sink's bbs_buffer of length that can accommodate the compressed input
+    * buffer.
+    */
+   output_buffer_bound = LZ4F_compressBound(mysink->base.bbs_buffer_length,
+                                            &mysink->prefs);
+
+   /*
+    * The buffer length is expected to be a multiple of BLCKSZ, so round up.
+    */
+   output_buffer_bound = output_buffer_bound + BLCKSZ -
+       (output_buffer_bound % BLCKSZ);
+
+   bbsink_begin_backup(sink->bbs_next, sink->bbs_state, output_buffer_bound);
+}
+
+/*
+ * Prepare to compress the next archive.
+ */
+static void
+bbsink_lz4_begin_archive(bbsink *sink, const char *archive_name)
+{
+   bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+   char       *lz4_archive_name;
+   LZ4F_errorCode_t ctxError;
+   size_t      headerSize;
+
+   ctxError = LZ4F_createCompressionContext(&mysink->ctx, LZ4F_VERSION);
+   if (LZ4F_isError(ctxError))
+       elog(ERROR, "could not create lz4 compression context: %s",
+            LZ4F_getErrorName(ctxError));
+
+   /* First of all write the frame header to destination buffer. */
+   headerSize = LZ4F_compressBegin(mysink->ctx,
+                                   mysink->base.bbs_next->bbs_buffer,
+                                   mysink->base.bbs_next->bbs_buffer_length,
+                                   &mysink->prefs);
+
+   if (LZ4F_isError(headerSize))
+       elog(ERROR, "could not write lz4 header: %s",
+            LZ4F_getErrorName(headerSize));
+
+   /*
+    * We need to write the compressed data after the header in the output
+    * buffer. So, make sure to update the notion of bytes written to output
+    * buffer.
+    */
+   mysink->bytes_written += headerSize;
+
+   /* Add ".lz4" to the archive name. */
+   lz4_archive_name = psprintf("%s.lz4", archive_name);
+   Assert(sink->bbs_next != NULL);
+   bbsink_begin_archive(sink->bbs_next, lz4_archive_name);
+   pfree(lz4_archive_name);
+}
+
+/*
+ * Compress the input data to the output buffer until we run out of input
+ * data. Each time the output buffer falls below the compression bound for
+ * the input buffer, invoke the archive_contents() method for then next sink.
+ *
+ * Note that since we're compressing the input, it may very commonly happen
+ * that we consume all the input data without filling the output buffer. In
+ * that case, the compressed representation of the current input data won't
+ * actually be sent to the next bbsink until a later call to this function,
+ * or perhaps even not until bbsink_lz4_end_archive() is invoked.
+ */
+static void
+bbsink_lz4_archive_contents(bbsink *sink, size_t avail_in)
+{
+   bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+   size_t      compressedSize;
+   size_t      avail_in_bound;
+
+   avail_in_bound = LZ4F_compressBound(avail_in, &mysink->prefs);
+
+   /*
+    * If the number of available bytes has fallen below the value computed by
+    * LZ4F_compressBound(), ask the next sink to process the data so that we
+    * can empty the buffer.
+    */
+   if ((mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written) <=
+       avail_in_bound)
+   {
+       bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
+       mysink->bytes_written = 0;
+   }
+
+   /*
+    * Compress the input buffer and write it into the output buffer.
+    */
+   compressedSize = LZ4F_compressUpdate(mysink->ctx,
+                                        mysink->base.bbs_next->bbs_buffer + mysink->bytes_written,
+                                        mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written,
+                                        (uint8 *) mysink->base.bbs_buffer,
+                                        avail_in,
+                                        NULL);
+
+   if (LZ4F_isError(compressedSize))
+       elog(ERROR, "could not compress data: %s",
+            LZ4F_getErrorName(compressedSize));
+
+   /*
+    * Update our notion of how many bytes we've written into output buffer.
+    */
+   mysink->bytes_written += compressedSize;
+}
+
+/*
+ * There might be some data inside lz4's internal buffers; we need to get
+ * that flushed out and also finalize the lz4 frame and then get that forwarded
+ * to the successor sink as archive content.
+ *
+ * Then we can end processing for this archive.
+ */
+static void
+bbsink_lz4_end_archive(bbsink *sink)
+{
+   bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+   size_t      compressedSize;
+   size_t      lz4_footer_bound;
+
+   lz4_footer_bound = LZ4F_compressBound(0, &mysink->prefs);
+
+   Assert(mysink->base.bbs_next->bbs_buffer_length >= lz4_footer_bound);
+
+   if ((mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written) <=
+       lz4_footer_bound)
+   {
+       bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
+       mysink->bytes_written = 0;
+   }
+
+   compressedSize = LZ4F_compressEnd(mysink->ctx,
+                                     mysink->base.bbs_next->bbs_buffer + mysink->bytes_written,
+                                     mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written,
+                                     NULL);
+
+   if (LZ4F_isError(compressedSize))
+       elog(ERROR, "could not end lz4 compression: %s",
+            LZ4F_getErrorName(compressedSize));
+
+   /* Update our notion of how many bytes we've written. */
+   mysink->bytes_written += compressedSize;
+
+   /* Send whatever accumulated output bytes we have. */
+   bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
+   mysink->bytes_written = 0;
+
+   /* Release the resources. */
+   LZ4F_freeCompressionContext(mysink->ctx);
+   mysink->ctx = NULL;
+
+   /* Pass on the information that this archive has ended. */
+   bbsink_forward_end_archive(sink);
+}
+
+/*
+ * Manifest contents are not compressed, but we do need to copy them into
+ * the successor sink's buffer, because we have our own.
+ */
+static void
+bbsink_lz4_manifest_contents(bbsink *sink, size_t len)
+{
+   memcpy(sink->bbs_next->bbs_buffer, sink->bbs_buffer, len);
+   bbsink_manifest_contents(sink->bbs_next, len);
+}
+
+/*
+ * In case the backup fails, make sure we free the compression context by
+ * calling LZ4F_freeCompressionContext() if needed to avoid memory leak.
+ */
+static void
+bbsink_lz4_cleanup(bbsink *sink)
+{
+   bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+
+   if (mysink->ctx)
+   {
+       LZ4F_freeCompressionContext(mysink->ctx);
+       mysink->ctx = NULL;
+   }
+}
+
+#endif
index c40925c1f0435d388569b772cc4d1da1350dd299..923659ddee5f5dc6fdc69136df85e85397ab2fc7 100644 (file)
@@ -391,7 +391,7 @@ usage(void)
    printf(_("  -X, --wal-method=none|fetch|stream\n"
             "                         include required WAL files with specified method\n"));
    printf(_("  -z, --gzip             compress tar output\n"));
-   printf(_("  -Z, --compress={[{client,server}-]gzip,none}[:LEVEL] or [LEVEL]\n"
+   printf(_("  -Z, --compress={[{client,server}-]gzip,lz4,none}[:LEVEL] or [LEVEL]\n"
             "                         compress tar output with given compression method or level\n"));
    printf(_("\nGeneral options:\n"));
    printf(_("  -c, --checkpoint=fast|spread\n"
@@ -1003,6 +1003,11 @@ parse_compress_options(char *src, WalCompressionMethod *methodres,
        *methodres = COMPRESSION_GZIP;
        *locationres = COMPRESS_LOCATION_SERVER;
    }
+   else if (pg_strcasecmp(firstpart, "server-lz4") == 0)
+   {
+       *methodres = COMPRESSION_LZ4;
+       *locationres = COMPRESS_LOCATION_SERVER;
+   }
    else if (pg_strcasecmp(firstpart, "none") == 0)
    {
        *methodres = COMPRESSION_NONE;
@@ -1930,6 +1935,9 @@ BaseBackup(void)
            case COMPRESSION_GZIP:
                compressmethodstr = "gzip";
                break;
+           case COMPRESSION_LZ4:
+               compressmethodstr = "lz4";
+               break;
            default:
                Assert(false);
                break;
@@ -2772,8 +2780,12 @@ main(int argc, char **argv)
            }
            break;
        case COMPRESSION_LZ4:
-           /* option not supported */
-           Assert(false);
+           if (compresslevel > 12)
+           {
+               pg_log_error("compression level %d of method %s higher than maximum of 12",
+                            compresslevel, "lz4");
+               exit(1);
+           }
            break;
    }
 
index 1ae818f9a112ceb67bb4a5610772b05617e48fac..851233a6e0eb7d42849972b0dfd12b2c55df8292 100644 (file)
@@ -9,6 +9,7 @@ export TAR
 # used by the command "gzip" to pass down options, so stick with a different
 # name.
 export GZIP_PROGRAM=$(GZIP)
+export LZ4=$(LZ4)
 
 subdir = src/bin/pg_verifybackup
 top_builddir = ../../..
index d32c86e92e5e2fed0a974b889fb657066396f9ef..9d5b0e139a2dd2da29ded933a5c95800d0ae22ab 100644 (file)
@@ -11,7 +11,7 @@ use Config;
 use File::Path qw(rmtree);
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
-use Test::More tests => 6;
+use Test::More tests => 9;
 
 my $primary = PostgreSQL::Test::Cluster->new('primary');
 $primary->init(allows_streaming => 1);
@@ -35,6 +35,14 @@ my @test_configuration = (
        'decompress_program' => $ENV{'GZIP_PROGRAM'},
        'decompress_flags' => [ '-d' ],
        'enabled' => check_pg_config("#define HAVE_LIBZ 1")
+   },
+   {
+       'compression_method' => 'lz4',
+       'backup_flags' => ['--compress', 'server-lz4'],
+       'backup_archive' => 'base.tar.lz4',
+       'decompress_program' => $ENV{'LZ4'},
+       'decompress_flags' => [ '-d', '-m'],
+       'enabled' => check_pg_config("#define HAVE_LIBLZ4 1")
    }
 );
 
index 2cfa816bb81604852f7e8779767ef9c26f4cdb8c..a3f8d3725825f3223f5df98731df648672a3dfc1 100644 (file)
@@ -284,6 +284,7 @@ extern void bbsink_forward_cleanup(bbsink *sink);
 /* Constructors for various types of sinks. */
 extern bbsink *bbsink_copystream_new(bool send_to_client);
 extern bbsink *bbsink_gzip_new(bbsink *next, int compresslevel);
+extern bbsink *bbsink_lz4_new(bbsink *next, int compresslevel);
 extern bbsink *bbsink_progress_new(bbsink *next, bool estimate_backup_size);
 extern bbsink *bbsink_server_new(bbsink *next, char *pathname);
 extern bbsink *bbsink_throttle_new(bbsink *next, uint32 maxrate);