Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e7ff596

Browse files
committed
For all ppc compilers, implement pg_atomic_fetch_add_ with inline asm.
This is more like how we handle s_lock.h and arch-x86.h. This does not materially affect code generation for gcc 7.2.0 or xlc 13.1.3. Reviewed by Tom Lane. Discussion: https://postgr.es/m/20190831071157.GA3251746@rfd.leadboat.com
1 parent dd50f1a commit e7ff596

File tree

5 files changed

+161
-66
lines changed

5 files changed

+161
-66
lines changed

configure

+40
Original file line numberDiff line numberDiff line change
@@ -14593,6 +14593,46 @@ $as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; }
1459314593

1459414594
$as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h
1459514595

14596+
fi
14597+
# Check if compiler accepts "i"(x) when __builtin_constant_p(x).
14598+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5
14599+
$as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; }
14600+
if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then :
14601+
$as_echo_n "(cached) " >&6
14602+
else
14603+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
14604+
/* end confdefs.h. */
14605+
static inline int
14606+
addi(int ra, int si)
14607+
{
14608+
int res = 0;
14609+
if (__builtin_constant_p(si))
14610+
__asm__ __volatile__(
14611+
" addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si));
14612+
return res;
14613+
}
14614+
int test_adds(int x) { return addi(3, x) + addi(x, 5); }
14615+
int
14616+
main ()
14617+
{
14618+
14619+
;
14620+
return 0;
14621+
}
14622+
_ACEOF
14623+
if ac_fn_c_try_compile "$LINENO"; then :
14624+
pgac_cv_have_i_constraint__builtin_constant_p=yes
14625+
else
14626+
pgac_cv_have_i_constraint__builtin_constant_p=no
14627+
fi
14628+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
14629+
fi
14630+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_i_constraint__builtin_constant_p" >&5
14631+
$as_echo "$pgac_cv_have_i_constraint__builtin_constant_p" >&6; }
14632+
if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then
14633+
14634+
$as_echo "#define HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P 1" >>confdefs.h
14635+
1459614636
fi
1459714637
;;
1459814638
esac

configure.in

+20
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,26 @@ case $host_cpu in
15391539
if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
15401540
AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.])
15411541
fi
1542+
# Check if compiler accepts "i"(x) when __builtin_constant_p(x).
1543+
AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance],
1544+
[pgac_cv_have_i_constraint__builtin_constant_p],
1545+
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
1546+
[static inline int
1547+
addi(int ra, int si)
1548+
{
1549+
int res = 0;
1550+
if (__builtin_constant_p(si))
1551+
__asm__ __volatile__(
1552+
" addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si));
1553+
return res;
1554+
}
1555+
int test_adds(int x) { return addi(3, x) + addi(x, 5); }], [])],
1556+
[pgac_cv_have_i_constraint__builtin_constant_p=yes],
1557+
[pgac_cv_have_i_constraint__builtin_constant_p=no])])
1558+
if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then
1559+
AC_DEFINE(HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P, 1,
1560+
[Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance.])
1561+
fi
15421562
;;
15431563
esac
15441564

src/include/pg_config.h.in

+3
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,9 @@
329329
/* Define to 1 if you have isinf(). */
330330
#undef HAVE_ISINF
331331

332+
/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */
333+
#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
334+
332335
/* Define to 1 if you have the <langinfo.h> header file. */
333336
#undef HAVE_LANGINFO_H
334337

src/include/port/atomics/arch-ppc.h

+98
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,103 @@
2525
#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
2626
#endif
2727

28+
#define PG_HAVE_ATOMIC_U32_SUPPORT
29+
typedef struct pg_atomic_uint32
30+
{
31+
volatile uint32 value;
32+
} pg_atomic_uint32;
33+
34+
/* 64bit atomics are only supported in 64bit mode */
35+
#ifdef __64BIT__
36+
#define PG_HAVE_ATOMIC_U64_SUPPORT
37+
typedef struct pg_atomic_uint64
38+
{
39+
volatile uint64 value pg_attribute_aligned(8);
40+
} pg_atomic_uint64;
41+
42+
#endif /* __64BIT__ */
43+
44+
#define PG_HAVE_ATOMIC_FETCH_ADD_U32
45+
static inline uint32
46+
pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
47+
{
48+
uint32 _t;
49+
uint32 res;
50+
51+
/*
52+
* xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc
53+
* 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In
54+
* xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic
55+
* would add redundant syncs on xlc 12.
56+
*/
57+
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
58+
if (__builtin_constant_p(add_) &&
59+
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
60+
__asm__ __volatile__(
61+
" sync \n"
62+
" lwarx %1,0,%4 \n"
63+
" addi %0,%1,%3 \n"
64+
" stwcx. %0,0,%4 \n"
65+
" bne $-12 \n" /* branch to lwarx */
66+
" isync \n"
67+
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
68+
: "i"(add_), "r"(&ptr->value)
69+
: "memory", "cc");
70+
else
71+
#endif
72+
__asm__ __volatile__(
73+
" sync \n"
74+
" lwarx %1,0,%4 \n"
75+
" add %0,%1,%3 \n"
76+
" stwcx. %0,0,%4 \n"
77+
" bne $-12 \n" /* branch to lwarx */
78+
" isync \n"
79+
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
80+
: "r"(add_), "r"(&ptr->value)
81+
: "memory", "cc");
82+
83+
return res;
84+
}
85+
86+
#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
87+
#define PG_HAVE_ATOMIC_FETCH_ADD_U64
88+
static inline uint64
89+
pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
90+
{
91+
uint64 _t;
92+
uint64 res;
93+
94+
/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
95+
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
96+
if (__builtin_constant_p(add_) &&
97+
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
98+
__asm__ __volatile__(
99+
" sync \n"
100+
" ldarx %1,0,%4 \n"
101+
" addi %0,%1,%3 \n"
102+
" stdcx. %0,0,%4 \n"
103+
" bne $-12 \n" /* branch to ldarx */
104+
" isync \n"
105+
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
106+
: "i"(add_), "r"(&ptr->value)
107+
: "memory", "cc");
108+
else
109+
#endif
110+
__asm__ __volatile__(
111+
" sync \n"
112+
" ldarx %1,0,%4 \n"
113+
" add %0,%1,%3 \n"
114+
" stdcx. %0,0,%4 \n"
115+
" bne $-12 \n" /* branch to ldarx */
116+
" isync \n"
117+
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
118+
: "r"(add_), "r"(&ptr->value)
119+
: "memory", "cc");
120+
121+
return res;
122+
}
123+
124+
#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
125+
28126
/* per architecture manual doubleword accesses have single copy atomicity */
29127
#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY

src/include/port/atomics/generic-xlc.h

-66
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,6 @@
1818

1919
#if defined(HAVE_ATOMICS)
2020

21-
#define PG_HAVE_ATOMIC_U32_SUPPORT
22-
typedef struct pg_atomic_uint32
23-
{
24-
volatile uint32 value;
25-
} pg_atomic_uint32;
26-
27-
28-
/* 64bit atomics are only supported in 64bit mode */
29-
#ifdef __64BIT__
30-
#define PG_HAVE_ATOMIC_U64_SUPPORT
31-
typedef struct pg_atomic_uint64
32-
{
33-
volatile uint64 value pg_attribute_aligned(8);
34-
} pg_atomic_uint64;
35-
36-
#endif /* __64BIT__ */
37-
3821
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
3922
static inline bool
4023
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
@@ -69,33 +52,6 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
6952
return ret;
7053
}
7154

72-
#define PG_HAVE_ATOMIC_FETCH_ADD_U32
73-
static inline uint32
74-
pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
75-
{
76-
uint32 _t;
77-
uint32 res;
78-
79-
/*
80-
* xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc
81-
* 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In
82-
* xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic
83-
* would add redundant syncs on xlc 12.
84-
*/
85-
__asm__ __volatile__(
86-
" sync \n"
87-
" lwarx %1,0,%4 \n"
88-
" add %0,%1,%3 \n"
89-
" stwcx. %0,0,%4 \n"
90-
" bne $-12 \n" /* branch to lwarx */
91-
" isync \n"
92-
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
93-
: "r"(add_), "r"(&ptr->value)
94-
: "memory", "cc");
95-
96-
return res;
97-
}
98-
9955
#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
10056

10157
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
@@ -115,28 +71,6 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
11571
return ret;
11672
}
11773

118-
#define PG_HAVE_ATOMIC_FETCH_ADD_U64
119-
static inline uint64
120-
pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
121-
{
122-
uint64 _t;
123-
uint64 res;
124-
125-
/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
126-
__asm__ __volatile__(
127-
" sync \n"
128-
" ldarx %1,0,%4 \n"
129-
" add %0,%1,%3 \n"
130-
" stdcx. %0,0,%4 \n"
131-
" bne $-12 \n" /* branch to ldarx */
132-
" isync \n"
133-
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
134-
: "r"(add_), "r"(&ptr->value)
135-
: "memory", "cc");
136-
137-
return res;
138-
}
139-
14074
#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
14175

14276
#endif /* defined(HAVE_ATOMICS) */

0 commit comments

Comments
 (0)