Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Freund2025-03-17 22:51:33 +0000
committerAndres Freund2025-03-17 22:51:33 +0000
commit02844012b304ba80d1c48d51f6fe10bb622490cc (patch)
treec7753eb6c900a00ebdaa2311b87aefbb21d9f588 /src/backend
parent65db3963ae7154b8f01e4d73dc6b1ffd81c70e1e (diff)
aio: Basic subsystem initialization
This commit just does the minimal wiring up of the AIO subsystem, added in the next commit, to the rest of the system. The next commit contains more details about motivation and architecture. This commit is kept separate to make it easier to review, separating the changes across the tree, from the implementation of the new subsystem. We discussed squashing this commit with the main commit before merging AIO, but there has been a mild preference for keeping it separate. Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/uvrtrknj4kdytuboidbhwclo4gxhswwcpgadptsjvjqcluzmah%40brqs62irg4dt
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/transam/xact.c12
-rw-r--r--src/backend/postmaster/autovacuum.c2
-rw-r--r--src/backend/postmaster/bgwriter.c2
-rw-r--r--src/backend/postmaster/checkpointer.c2
-rw-r--r--src/backend/postmaster/pgarch.c2
-rw-r--r--src/backend/postmaster/walsummarizer.c2
-rw-r--r--src/backend/postmaster/walwriter.c2
-rw-r--r--src/backend/replication/walsender.c2
-rw-r--r--src/backend/storage/aio/Makefile2
-rw-r--r--src/backend/storage/aio/aio.c90
-rw-r--r--src/backend/storage/aio/aio_init.c37
-rw-r--r--src/backend/storage/aio/meson.build2
-rw-r--r--src/backend/storage/ipc/ipci.c3
-rw-r--r--src/backend/utils/init/postinit.c7
-rw-r--r--src/backend/utils/misc/guc_tables.c23
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample6
-rw-r--r--src/backend/utils/resowner/resowner.c29
17 files changed, 225 insertions, 0 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 1b4f21a88d3..b885513f765 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -51,6 +51,7 @@
#include "replication/origin.h"
#include "replication/snapbuild.h"
#include "replication/syncrep.h"
+#include "storage/aio_subsys.h"
#include "storage/condition_variable.h"
#include "storage/fd.h"
#include "storage/lmgr.h"
@@ -2411,6 +2412,8 @@ CommitTransaction(void)
RESOURCE_RELEASE_BEFORE_LOCKS,
true, true);
+ AtEOXact_Aio(true);
+
/* Check we've released all buffer pins */
AtEOXact_Buffers(true);
@@ -2716,6 +2719,8 @@ PrepareTransaction(void)
RESOURCE_RELEASE_BEFORE_LOCKS,
true, true);
+ AtEOXact_Aio(true);
+
/* Check we've released all buffer pins */
AtEOXact_Buffers(true);
@@ -2830,6 +2835,8 @@ AbortTransaction(void)
pgstat_report_wait_end();
pgstat_progress_end_command();
+ pgaio_error_cleanup();
+
/* Clean up buffer content locks, too */
UnlockBuffers();
@@ -2960,6 +2967,7 @@ AbortTransaction(void)
ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
+ AtEOXact_Aio(false);
AtEOXact_Buffers(false);
AtEOXact_RelationCache(false);
AtEOXact_TypeCache();
@@ -5232,6 +5240,9 @@ AbortSubTransaction(void)
pgstat_report_wait_end();
pgstat_progress_end_command();
+
+ pgaio_error_cleanup();
+
UnlockBuffers();
/* Reset WAL record construction state */
@@ -5326,6 +5337,7 @@ AbortSubTransaction(void)
RESOURCE_RELEASE_BEFORE_LOCKS,
false, false);
+ AtEOXact_Aio(false);
AtEOSubXact_RelationCache(false, s->subTransactionId,
s->parent->subTransactionId);
AtEOSubXact_TypeCache();
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 71c34027c88..2513a8ef8a6 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -88,6 +88,7 @@
#include "postmaster/autovacuum.h"
#include "postmaster/interrupt.h"
#include "postmaster/postmaster.h"
+#include "storage/aio_subsys.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/latch.h"
@@ -465,6 +466,7 @@ AutoVacLauncherMain(const void *startup_data, size_t startup_data_len)
*/
LWLockReleaseAll();
pgstat_report_wait_end();
+ pgaio_error_cleanup();
UnlockBuffers();
/* this is probably dead code, but let's be safe: */
if (AuxProcessResourceOwner)
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index a688cc5d2a1..72f5acceec7 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -38,6 +38,7 @@
#include "postmaster/auxprocess.h"
#include "postmaster/bgwriter.h"
#include "postmaster/interrupt.h"
+#include "storage/aio_subsys.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/condition_variable.h"
@@ -168,6 +169,7 @@ BackgroundWriterMain(const void *startup_data, size_t startup_data_len)
*/
LWLockReleaseAll();
ConditionVariableCancelSleep();
+ pgaio_error_cleanup();
UnlockBuffers();
ReleaseAuxProcessResources(false);
AtEOXact_Buffers(false);
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 0e228d143a0..fda91ffd1ce 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -49,6 +49,7 @@
#include "postmaster/bgwriter.h"
#include "postmaster/interrupt.h"
#include "replication/syncrep.h"
+#include "storage/aio_subsys.h"
#include "storage/bufmgr.h"
#include "storage/condition_variable.h"
#include "storage/fd.h"
@@ -276,6 +277,7 @@ CheckpointerMain(const void *startup_data, size_t startup_data_len)
LWLockReleaseAll();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
+ pgaio_error_cleanup();
UnlockBuffers();
ReleaseAuxProcessResources(false);
AtEOXact_Buffers(false);
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index dbe4e1d426b..7e622ae4bd2 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -40,6 +40,7 @@
#include "postmaster/interrupt.h"
#include "postmaster/pgarch.h"
#include "storage/condition_variable.h"
+#include "storage/aio_subsys.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/latch.h"
@@ -568,6 +569,7 @@ pgarch_archiveXlog(char *xlog)
LWLockReleaseAll();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
+ pgaio_error_cleanup();
ReleaseAuxProcessResources(false);
AtEOXact_Files(false);
AtEOXact_HashTables(false);
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index ccba0f84e6e..0fec4f1f871 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -38,6 +38,7 @@
#include "postmaster/interrupt.h"
#include "postmaster/walsummarizer.h"
#include "replication/walreceiver.h"
+#include "storage/aio_subsys.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/latch.h"
@@ -289,6 +290,7 @@ WalSummarizerMain(const void *startup_data, size_t startup_data_len)
LWLockReleaseAll();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
+ pgaio_error_cleanup();
ReleaseAuxProcessResources(false);
AtEOXact_Files(false);
AtEOXact_HashTables(false);
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 0380601bcbb..fd92c8b7a33 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -51,6 +51,7 @@
#include "postmaster/auxprocess.h"
#include "postmaster/interrupt.h"
#include "postmaster/walwriter.h"
+#include "storage/aio_subsys.h"
#include "storage/bufmgr.h"
#include "storage/condition_variable.h"
#include "storage/fd.h"
@@ -164,6 +165,7 @@ WalWriterMain(const void *startup_data, size_t startup_data_len)
LWLockReleaseAll();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
+ pgaio_error_cleanup();
UnlockBuffers();
ReleaseAuxProcessResources(false);
AtEOXact_Buffers(false);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index d96121b3aad..1028919aecb 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -79,6 +79,7 @@
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/condition_variable.h"
+#include "storage/aio_subsys.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pmsignal.h"
@@ -327,6 +328,7 @@ WalSndErrorCleanup(void)
LWLockReleaseAll();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
+ pgaio_error_cleanup();
if (xlogreader != NULL && xlogreader->seg.ws_file >= 0)
wal_segment_close(xlogreader);
diff --git a/src/backend/storage/aio/Makefile b/src/backend/storage/aio/Makefile
index 2f29a9ec4d1..eaeaeeee8e3 100644
--- a/src/backend/storage/aio/Makefile
+++ b/src/backend/storage/aio/Makefile
@@ -9,6 +9,8 @@ top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = \
+ aio.o \
+ aio_init.o \
read_stream.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
new file mode 100644
index 00000000000..828a94efdc3
--- /dev/null
+++ b/src/backend/storage/aio/aio.c
@@ -0,0 +1,90 @@
+/*-------------------------------------------------------------------------
+ *
+ * aio.c
+ * AIO - Core Logic
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/storage/aio/aio.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "lib/ilist.h"
+#include "storage/aio.h"
+#include "storage/aio_subsys.h"
+#include "utils/guc.h"
+#include "utils/guc_hooks.h"
+
+
+/* Options for io_method. */
+const struct config_enum_entry io_method_options[] = {
+ {"sync", IOMETHOD_SYNC, false},
+ {NULL, 0, false}
+};
+
+/* GUCs */
+int io_method = DEFAULT_IO_METHOD;
+int io_max_concurrency = -1;
+
+
+
+/*
+ * Release IO handle during resource owner cleanup.
+ */
+void
+pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
+{
+}
+
+/*
+ * Perform AIO related cleanup after an error.
+ *
+ * This should be called early in the error recovery paths, as later steps may
+ * need to issue AIO (e.g. to record a transaction abort WAL record).
+ */
+void
+pgaio_error_cleanup(void)
+{
+}
+
+/*
+ * Perform AIO related checks at (sub-)transactional boundaries.
+ *
+ * This should be called late during (sub-)transactional commit/abort, after
+ * all steps that might need to perform AIO, so that we can verify that the
+ * AIO subsystem is in a valid state at the end of a transaction.
+ */
+void
+AtEOXact_Aio(bool is_commit)
+{
+}
+
+void
+assign_io_method(int newval, void *extra)
+{
+}
+
+bool
+check_io_max_concurrency(int *newval, void **extra, GucSource source)
+{
+ if (*newval == -1)
+ {
+ /*
+ * Auto-tuning will be applied later during startup, as auto-tuning
+ * depends on the value of various GUCs.
+ */
+ return true;
+ }
+ else if (*newval == 0)
+ {
+ GUC_check_errdetail("Only -1 or values bigger than 0 are valid.");
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/backend/storage/aio/aio_init.c b/src/backend/storage/aio/aio_init.c
new file mode 100644
index 00000000000..aeacc144149
--- /dev/null
+++ b/src/backend/storage/aio/aio_init.c
@@ -0,0 +1,37 @@
+/*-------------------------------------------------------------------------
+ *
+ * aio_init.c
+ * AIO - Subsystem Initialization
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/storage/aio/aio_init.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "storage/aio_subsys.h"
+
+
+
+Size
+AioShmemSize(void)
+{
+ Size sz = 0;
+
+ return sz;
+}
+
+void
+AioShmemInit(void)
+{
+}
+
+void
+pgaio_init_backend(void)
+{
+}
diff --git a/src/backend/storage/aio/meson.build b/src/backend/storage/aio/meson.build
index 8abe0eb4863..c822fd4ddf7 100644
--- a/src/backend/storage/aio/meson.build
+++ b/src/backend/storage/aio/meson.build
@@ -1,5 +1,7 @@
# Copyright (c) 2024-2025, PostgreSQL Global Development Group
backend_sources += files(
+ 'aio.c',
+ 'aio_init.c',
'read_stream.c',
)
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 174eed70367..2fa045e6b0f 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -37,6 +37,7 @@
#include "replication/slotsync.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
+#include "storage/aio_subsys.h"
#include "storage/bufmgr.h"
#include "storage/dsm.h"
#include "storage/dsm_registry.h"
@@ -148,6 +149,7 @@ CalculateShmemSize(int *num_semaphores)
size = add_size(size, WaitEventCustomShmemSize());
size = add_size(size, InjectionPointShmemSize());
size = add_size(size, SlotSyncShmemSize());
+ size = add_size(size, AioShmemSize());
/* include additional requested shmem from preload libraries */
size = add_size(size, total_addin_request);
@@ -340,6 +342,7 @@ CreateOrAttachShmemStructs(void)
StatsShmemInit();
WaitEventCustomShmemInit();
InjectionPointShmemInit();
+ AioShmemInit();
}
/*
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 4b2faf1ba9d..7958ea11b73 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -43,6 +43,7 @@
#include "replication/slot.h"
#include "replication/slotsync.h"
#include "replication/walsender.h"
+#include "storage/aio_subsys.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
@@ -635,6 +636,12 @@ BaseInit(void)
*/
pgstat_initialize();
+ /*
+ * Initialize AIO before infrastructure that might need to actually
+ * execute AIO.
+ */
+ pgaio_init_backend();
+
/* Do local initialization of storage and buffer managers */
InitSync();
smgrinit();
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 9c0b10ad4dc..0d3ebf06a95 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -72,6 +72,7 @@
#include "replication/slot.h"
#include "replication/slotsync.h"
#include "replication/syncrep.h"
+#include "storage/aio.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/large_object.h"
@@ -3255,6 +3256,18 @@ struct config_int ConfigureNamesInt[] =
},
{
+ {"io_max_concurrency",
+ PGC_POSTMASTER,
+ RESOURCES_IO,
+ gettext_noop("Max number of IOs that one process can execute simultaneously."),
+ NULL,
+ },
+ &io_max_concurrency,
+ -1, -1, 1024,
+ check_io_max_concurrency, NULL, NULL
+ },
+
+ {
{"backend_flush_after", PGC_USERSET, RESOURCES_IO,
gettext_noop("Number of pages after which previously performed writes are flushed to disk."),
gettext_noop("0 disables forced writeback."),
@@ -5311,6 +5324,16 @@ struct config_enum ConfigureNamesEnum[] =
NULL, NULL, NULL
},
+ {
+ {"io_method", PGC_POSTMASTER, RESOURCES_IO,
+ gettext_noop("Selects the method for executing asynchronous I/O."),
+ NULL
+ },
+ &io_method,
+ DEFAULT_IO_METHOD, io_method_options,
+ NULL, assign_io_method, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 8de86e0c945..43c2ec2153e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -202,6 +202,12 @@
#maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching
#io_combine_limit = 128kB # usually 1-32 blocks (depends on OS)
+#io_method = sync # sync (change requires restart)
+#io_max_concurrency = -1 # Max number of IOs that one process
+ # can execute simultaneously
+ # -1 sets based on shared_buffers
+ # (change requires restart)
+
# - Worker Processes -
#max_worker_processes = 8 # (change requires restart)
diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c
index ac5ca4a765e..d39f3e1b655 100644
--- a/src/backend/utils/resowner/resowner.c
+++ b/src/backend/utils/resowner/resowner.c
@@ -47,6 +47,8 @@
#include "common/hashfn.h"
#include "common/int.h"
+#include "lib/ilist.h"
+#include "storage/aio.h"
#include "storage/ipc.h"
#include "storage/predicate.h"
#include "storage/proc.h"
@@ -155,6 +157,12 @@ struct ResourceOwnerData
/* The local locks cache. */
LOCALLOCK *locks[MAX_RESOWNER_LOCKS]; /* list of owned locks */
+
+ /*
+ * AIO handles need be registered in critical sections and therefore
+ * cannot use the normal ResourceElem mechanism.
+ */
+ dlist_head aio_handles;
};
@@ -425,6 +433,8 @@ ResourceOwnerCreate(ResourceOwner parent, const char *name)
parent->firstchild = owner;
}
+ dlist_init(&owner->aio_handles);
+
return owner;
}
@@ -725,6 +735,13 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
* so issue warnings. In the abort case, just clean up quietly.
*/
ResourceOwnerReleaseAll(owner, phase, isCommit);
+
+ while (!dlist_is_empty(&owner->aio_handles))
+ {
+ dlist_node *node = dlist_head_node(&owner->aio_handles);
+
+ pgaio_io_release_resowner(node, !isCommit);
+ }
}
else if (phase == RESOURCE_RELEASE_LOCKS)
{
@@ -1082,3 +1099,15 @@ ResourceOwnerForgetLock(ResourceOwner owner, LOCALLOCK *locallock)
elog(ERROR, "lock reference %p is not owned by resource owner %s",
locallock, owner->name);
}
+
+void
+ResourceOwnerRememberAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
+{
+ dlist_push_tail(&owner->aio_handles, ioh_node);
+}
+
+void
+ResourceOwnerForgetAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
+{
+ dlist_delete_from(&owner->aio_handles, ioh_node);
+}