Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 38628db

Browse files
committed
Add memory barriers for PgBackendStatus.st_changecount protocol.
st_changecount protocol needs the memory barriers to ensure that the apparent order of execution is as it desires. Otherwise, for example, the CPU might rearrange the code so that st_changecount is incremented twice before the modification on a machine with weak memory ordering. This surprising result can lead to bugs. This commit introduces the macros to load and store st_changecount with the memory barriers. These are called before and after PgBackendStatus entries are modified or copied into private memory, in order to prevent CPU from reordering PgBackendStatus access. Per discussion on pgsql-hackers, we decided not to back-patch this to 9.4 or before until we get an actual bug report about this. Patch by me. Review by Robert Haas.
1 parent 19e065c commit 38628db

File tree

2 files changed

+71
-24
lines changed

2 files changed

+71
-24
lines changed

src/backend/postmaster/pgstat.c

+27-24
Original file line numberDiff line numberDiff line change
@@ -2563,7 +2563,7 @@ pgstat_bestart(void)
25632563
beentry = MyBEEntry;
25642564
do
25652565
{
2566-
beentry->st_changecount++;
2566+
pgstat_increment_changecount_before(beentry);
25672567
} while ((beentry->st_changecount & 1) == 0);
25682568

25692569
beentry->st_procpid = MyProcPid;
@@ -2588,8 +2588,7 @@ pgstat_bestart(void)
25882588
beentry->st_appname[NAMEDATALEN - 1] = '\0';
25892589
beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
25902590

2591-
beentry->st_changecount++;
2592-
Assert((beentry->st_changecount & 1) == 0);
2591+
pgstat_increment_changecount_after(beentry);
25932592

25942593
/* Update app name to current GUC setting */
25952594
if (application_name)
@@ -2624,12 +2623,11 @@ pgstat_beshutdown_hook(int code, Datum arg)
26242623
* before and after. We use a volatile pointer here to ensure the
26252624
* compiler doesn't try to get cute.
26262625
*/
2627-
beentry->st_changecount++;
2626+
pgstat_increment_changecount_before(beentry);
26282627

26292628
beentry->st_procpid = 0; /* mark invalid */
26302629

2631-
beentry->st_changecount++;
2632-
Assert((beentry->st_changecount & 1) == 0);
2630+
pgstat_increment_changecount_after(beentry);
26332631
}
26342632

26352633

@@ -2666,16 +2664,15 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
26662664
* non-disabled state. As our final update, change the state and
26672665
* clear fields we will not be updating anymore.
26682666
*/
2669-
beentry->st_changecount++;
2667+
pgstat_increment_changecount_before(beentry);
26702668
beentry->st_state = STATE_DISABLED;
26712669
beentry->st_state_start_timestamp = 0;
26722670
beentry->st_activity[0] = '\0';
26732671
beentry->st_activity_start_timestamp = 0;
26742672
/* st_xact_start_timestamp and st_waiting are also disabled */
26752673
beentry->st_xact_start_timestamp = 0;
26762674
beentry->st_waiting = false;
2677-
beentry->st_changecount++;
2678-
Assert((beentry->st_changecount & 1) == 0);
2675+
pgstat_increment_changecount_after(beentry);
26792676
}
26802677
return;
26812678
}
@@ -2695,7 +2692,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
26952692
/*
26962693
* Now update the status entry
26972694
*/
2698-
beentry->st_changecount++;
2695+
pgstat_increment_changecount_before(beentry);
26992696

27002697
beentry->st_state = state;
27012698
beentry->st_state_start_timestamp = current_timestamp;
@@ -2707,8 +2704,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
27072704
beentry->st_activity_start_timestamp = start_timestamp;
27082705
}
27092706

2710-
beentry->st_changecount++;
2711-
Assert((beentry->st_changecount & 1) == 0);
2707+
pgstat_increment_changecount_after(beentry);
27122708
}
27132709

27142710
/* ----------
@@ -2734,13 +2730,12 @@ pgstat_report_appname(const char *appname)
27342730
* st_changecount before and after. We use a volatile pointer here to
27352731
* ensure the compiler doesn't try to get cute.
27362732
*/
2737-
beentry->st_changecount++;
2733+
pgstat_increment_changecount_before(beentry);
27382734

27392735
memcpy((char *) beentry->st_appname, appname, len);
27402736
beentry->st_appname[len] = '\0';
27412737

2742-
beentry->st_changecount++;
2743-
Assert((beentry->st_changecount & 1) == 0);
2738+
pgstat_increment_changecount_after(beentry);
27442739
}
27452740

27462741
/*
@@ -2760,10 +2755,9 @@ pgstat_report_xact_timestamp(TimestampTz tstamp)
27602755
* st_changecount before and after. We use a volatile pointer here to
27612756
* ensure the compiler doesn't try to get cute.
27622757
*/
2763-
beentry->st_changecount++;
2758+
pgstat_increment_changecount_before(beentry);
27642759
beentry->st_xact_start_timestamp = tstamp;
2765-
beentry->st_changecount++;
2766-
Assert((beentry->st_changecount & 1) == 0);
2760+
pgstat_increment_changecount_after(beentry);
27672761
}
27682762

27692763
/* ----------
@@ -2839,7 +2833,10 @@ pgstat_read_current_status(void)
28392833
*/
28402834
for (;;)
28412835
{
2842-
int save_changecount = beentry->st_changecount;
2836+
int before_changecount;
2837+
int after_changecount;
2838+
2839+
pgstat_save_changecount_before(beentry, before_changecount);
28432840

28442841
localentry->backendStatus.st_procpid = beentry->st_procpid;
28452842
if (localentry->backendStatus.st_procpid > 0)
@@ -2856,8 +2853,9 @@ pgstat_read_current_status(void)
28562853
localentry->backendStatus.st_activity = localactivity;
28572854
}
28582855

2859-
if (save_changecount == beentry->st_changecount &&
2860-
(save_changecount & 1) == 0)
2856+
pgstat_save_changecount_after(beentry, after_changecount);
2857+
if (before_changecount == after_changecount &&
2858+
(before_changecount & 1) == 0)
28612859
break;
28622860

28632861
/* Make sure we can break out of loop if stuck... */
@@ -2927,12 +2925,17 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
29272925

29282926
for (;;)
29292927
{
2930-
int save_changecount = vbeentry->st_changecount;
2928+
int before_changecount;
2929+
int after_changecount;
2930+
2931+
pgstat_save_changecount_before(vbeentry, before_changecount);
29312932

29322933
found = (vbeentry->st_procpid == pid);
29332934

2934-
if (save_changecount == vbeentry->st_changecount &&
2935-
(save_changecount & 1) == 0)
2935+
pgstat_save_changecount_after(vbeentry, after_changecount);
2936+
2937+
if (before_changecount == after_changecount &&
2938+
(before_changecount & 1) == 0)
29362939
break;
29372940

29382941
/* Make sure we can break out of loop if stuck... */

src/include/pgstat.h

+44
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "libpq/pqcomm.h"
1717
#include "portability/instr_time.h"
1818
#include "postmaster/pgarch.h"
19+
#include "storage/barrier.h"
1920
#include "utils/hsearch.h"
2021
#include "utils/relcache.h"
2122

@@ -714,6 +715,12 @@ typedef struct PgBackendStatus
714715
* st_changecount again. If the value hasn't changed, and if it's even,
715716
* the copy is valid; otherwise start over. This makes updates cheap
716717
* while reads are potentially expensive, but that's the tradeoff we want.
718+
*
719+
* The above protocol needs the memory barriers to ensure that
720+
* the apparent order of execution is as it desires. Otherwise,
721+
* for example, the CPU might rearrange the code so that st_changecount
722+
* is incremented twice before the modification on a machine with
723+
* weak memory ordering. This surprising result can lead to bugs.
717724
*/
718725
int st_changecount;
719726

@@ -745,6 +752,43 @@ typedef struct PgBackendStatus
745752
char *st_activity;
746753
} PgBackendStatus;
747754

755+
/*
756+
* Macros to load and store st_changecount with the memory barriers.
757+
*
758+
* pgstat_increment_changecount_before() and
759+
* pgstat_increment_changecount_after() need to be called before and after
760+
* PgBackendStatus entries are modified, respectively. This makes sure that
761+
* st_changecount is incremented around the modification.
762+
*
763+
* Also pgstat_save_changecount_before() and pgstat_save_changecount_after()
764+
* need to be called before and after PgBackendStatus entries are copied into
765+
* private memory, respectively.
766+
*/
767+
#define pgstat_increment_changecount_before(beentry) \
768+
do { \
769+
beentry->st_changecount++; \
770+
pg_write_barrier(); \
771+
} while (0)
772+
773+
#define pgstat_increment_changecount_after(beentry) \
774+
do { \
775+
pg_write_barrier(); \
776+
beentry->st_changecount++; \
777+
Assert((beentry->st_changecount & 1) == 0); \
778+
} while (0)
779+
780+
#define pgstat_save_changecount_before(beentry, save_changecount) \
781+
do { \
782+
save_changecount = beentry->st_changecount; \
783+
pg_read_barrier(); \
784+
} while (0)
785+
786+
#define pgstat_save_changecount_after(beentry, save_changecount) \
787+
do { \
788+
pg_read_barrier(); \
789+
save_changecount = beentry->st_changecount; \
790+
} while (0)
791+
748792
/* ----------
749793
* LocalPgBackendStatus
750794
*

0 commit comments

Comments
 (0)