Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 44fe30f

Browse files
Add default_char_signedness field to ControlFileData.
The signedness of the 'char' type in C is implementation-dependent. For instance, 'signed char' is used by default on x86 CPUs, while 'unsigned char' is used on aarch CPUs. Previously, we accidentally let C implementation signedness affect persistent data. This led to inconsistent results when comparing char data across different platforms. This commit introduces a new 'default_char_signedness' field in ControlFileData to store the signedness of the 'char' type. While this change does not encourage the use of 'char' without explicitly specifying its signedness, this field can be used as a hint to ensure consistent behavior for pre-v18 data files that store data sorted by the 'char' type on disk (e.g., GIN and GiST indexes), especially in cross-platform replication scenarios. Newly created database clusters unconditionally set the default char signedness to true. pg_upgrade (with an upcoming commit) changes this flag for clusters if the source database cluster has signedness=false. As a result, signedness=false setting will become rare over time. If we had known about the problem during the last development cycle that forced initdb (v8.3), we would have made all clusters signed or all clusters unsigned. Making pg_upgrade the only source of signedness=false will cause the population of database clusters to converge toward that retrospective ideal. Bump catalog version (for the catalog changes) and PG_CONTROL_VERSION (for the additions in ControlFileData). Reviewed-by: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/CB11ADBC-0C3F-4FE0-A678-666EE80CBB07%40amazon.com
1 parent 901a1cf commit 44fe30f

File tree

8 files changed

+64
-7
lines changed

8 files changed

+64
-7
lines changed

doc/src/sgml/func.sgml

+5
Original file line numberDiff line numberDiff line change
@@ -27991,6 +27991,11 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres}
2799127991
<entry><type>integer</type></entry>
2799227992
</row>
2799327993

27994+
<row>
27995+
<entry><structfield>default_char_signedness</structfield></entry>
27996+
<entry><type>boolean</type></entry>
27997+
</row>
27998+
2799427999
</tbody>
2799528000
</tgroup>
2799628001
</table>

src/backend/access/transam/xlog.c

+40
Original file line numberDiff line numberDiff line change
@@ -4284,6 +4284,33 @@ WriteControlFile(void)
42844284

42854285
ControlFile->float8ByVal = FLOAT8PASSBYVAL;
42864286

4287+
/*
4288+
* Initialize the default 'char' signedness.
4289+
*
4290+
* The signedness of the char type is implementation-defined. For instance
4291+
* on x86 architecture CPUs, the char data type is typically treated as
4292+
* signed by default, whereas on aarch architecture CPUs, it is typically
4293+
* treated as unsigned by default. In v17 or earlier, we accidentally let
4294+
* C implementation signedness affect persistent data. This led to
4295+
* inconsistent results when comparing char data across different
4296+
* platforms.
4297+
*
4298+
* This flag can be used as a hint to ensure consistent behavior for
4299+
* pre-v18 data files that store data sorted by the 'char' type on disk,
4300+
* especially in cross-platform replication scenarios.
4301+
*
4302+
* Newly created database clusters unconditionally set the default char
4303+
* signedness to true. pg_upgrade changes this flag for clusters that were
4304+
* initialized on signedness=false platforms. As a result,
4305+
* signedness=false setting will become rare over time. If we had known
4306+
* about this problem during the last development cycle that forced initdb
4307+
* (v8.3), we would have made all clusters signed or all clusters
4308+
* unsigned. Making pg_upgrade the only source of signedness=false will
4309+
* cause the population of database clusters to converge toward that
4310+
* retrospective ideal.
4311+
*/
4312+
ControlFile->default_char_signedness = true;
4313+
42874314
/* Contents are protected with a CRC */
42884315
INIT_CRC32C(ControlFile->crc);
42894316
COMP_CRC32C(ControlFile->crc,
@@ -4612,6 +4639,19 @@ DataChecksumsEnabled(void)
46124639
return (ControlFile->data_checksum_version > 0);
46134640
}
46144641

4642+
/*
4643+
* Return true if the cluster was initialized on a platform where the
4644+
* default signedness of char is "signed". This function exists for code
4645+
* that deals with pre-v18 data files that store data sorted by the 'char'
4646+
* type on disk (e.g., GIN and GiST indexes). See the comments in
4647+
* WriteControlFile() for details.
4648+
*/
4649+
bool
4650+
GetDefaultCharSignedness(void)
4651+
{
4652+
return ControlFile->default_char_signedness;
4653+
}
4654+
46154655
/*
46164656
* Returns a fake LSN for unlogged relations.
46174657
*

src/backend/utils/misc/pg_controldata.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ pg_control_recovery(PG_FUNCTION_ARGS)
203203
Datum
204204
pg_control_init(PG_FUNCTION_ARGS)
205205
{
206-
Datum values[11];
207-
bool nulls[11];
206+
Datum values[12];
207+
bool nulls[12];
208208
TupleDesc tupdesc;
209209
HeapTuple htup;
210210
ControlFileData *ControlFile;
@@ -254,6 +254,9 @@ pg_control_init(PG_FUNCTION_ARGS)
254254
values[10] = Int32GetDatum(ControlFile->data_checksum_version);
255255
nulls[10] = false;
256256

257+
values[11] = BoolGetDatum(ControlFile->default_char_signedness);
258+
nulls[11] = false;
259+
257260
htup = heap_form_tuple(tupdesc, values, nulls);
258261

259262
PG_RETURN_DATUM(HeapTupleGetDatum(htup));

src/bin/pg_controldata/pg_controldata.c

+2
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,8 @@ main(int argc, char *argv[])
336336
(ControlFile->float8ByVal ? _("by value") : _("by reference")));
337337
printf(_("Data page checksum version: %u\n"),
338338
ControlFile->data_checksum_version);
339+
printf(_("Default char data signedness: %s\n"),
340+
(ControlFile->default_char_signedness ? _("signed") : _("unsigned")));
339341
printf(_("Mock authentication nonce: %s\n"),
340342
mock_auth_nonce_str);
341343
return 0;

src/include/access/xlog.h

+1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ extern XLogRecPtr GetXLogWriteRecPtr(void);
231231
extern uint64 GetSystemIdentifier(void);
232232
extern char *GetMockAuthenticationNonce(void);
233233
extern bool DataChecksumsEnabled(void);
234+
extern bool GetDefaultCharSignedness(void);
234235
extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
235236
extern Size XLOGShmemSize(void);
236237
extern void XLOGShmemInit(void);

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202502211
60+
#define CATALOG_VERSION_NO 202502212
6161

6262
#endif

src/include/catalog/pg_control.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323

2424
/* Version identifier for this pg_control format */
25-
#define PG_CONTROL_VERSION 1700
25+
#define PG_CONTROL_VERSION 1800
2626

2727
/* Nonce key length, see below */
2828
#define MOCK_AUTH_NONCE_LEN 32
@@ -221,6 +221,12 @@ typedef struct ControlFileData
221221
/* Are data pages protected by checksums? Zero if no checksum version */
222222
uint32 data_checksum_version;
223223

224+
/*
225+
* True if the default signedness of char is "signed" on a platform where
226+
* the cluster is initialized.
227+
*/
228+
bool default_char_signedness;
229+
224230
/*
225231
* Random nonce, used in authentication requests that need to proceed
226232
* based on values that are cluster-unique, like a SASL exchange that

src/include/catalog/pg_proc.dat

+3-3
Original file line numberDiff line numberDiff line change
@@ -12206,9 +12206,9 @@
1220612206
descr => 'pg_controldata init state information as a function',
1220712207
proname => 'pg_control_init', provolatile => 'v', prorettype => 'record',
1220812208
proargtypes => '',
12209-
proallargtypes => '{int4,int4,int4,int4,int4,int4,int4,int4,int4,bool,int4}',
12210-
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
12211-
proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version}',
12209+
proallargtypes => '{int4,int4,int4,int4,int4,int4,int4,int4,int4,bool,int4,bool}',
12210+
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
12211+
proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version,default_char_signedness}',
1221212212
prosrc => 'pg_control_init' },
1221312213

1221412214
# subscripting support for built-in types

0 commit comments

Comments
 (0)