Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit fd15dba

Browse files
committed
Fix encode(...bytea..., 'escape') so that it converts all high-bit-set byte
values into \nnn octal escape sequences. When the database encoding is multibyte this is *necessary* to avoid generating invalidly encoded text. Even in a single-byte encoding, the old behavior seems very hazardous --- consider for example what happens if the text is transferred to another database with a different encoding. Decoding would then yield some other bytea value than what was encoded, which is surely undesirable. Per gripe from Hernan Gonzalez. Backpatch to 8.3, but not further. This is a bit of a judgment call, but I make it on these grounds: pre-8.3 we don't really have much encoding safety anyway because of the convert() function family, and we would also have much higher risk of breaking existing apps that may not be expecting this behavior. 8.3 is still new enough that we can probably get away with making this change in the function's behavior.
1 parent bc93919 commit fd15dba

File tree

1 file changed

+21
-16
lines changed

1 file changed

+21
-16
lines changed

src/backend/utils/adt/encode.c

+21-16
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.20 2008/01/01 19:45:52 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.21 2008/02/26 02:54:08 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -26,7 +26,7 @@ struct pg_encoding
2626
unsigned (*decode) (const char *data, unsigned dlen, char *res);
2727
};
2828

29-
static struct pg_encoding *pg_find_encoding(const char *name);
29+
static const struct pg_encoding *pg_find_encoding(const char *name);
3030

3131
/*
3232
* SQL functions.
@@ -42,7 +42,7 @@ binary_encode(PG_FUNCTION_ARGS)
4242
int datalen,
4343
resultlen,
4444
res;
45-
struct pg_encoding *enc;
45+
const struct pg_encoding *enc;
4646

4747
datalen = VARSIZE(data) - VARHDRSZ;
4848

@@ -78,7 +78,7 @@ binary_decode(PG_FUNCTION_ARGS)
7878
int datalen,
7979
resultlen,
8080
res;
81-
struct pg_encoding *enc;
81+
const struct pg_encoding *enc;
8282

8383
datalen = VARSIZE(data) - VARHDRSZ;
8484

@@ -348,10 +348,13 @@ b64_dec_len(const char *src, unsigned srclen)
348348
* Minimally escape bytea to text.
349349
* De-escape text to bytea.
350350
*
351-
* Only two characters are escaped:
352-
* \0 (null) and \\ (backslash)
351+
* We must escape zero bytes and high-bit-set bytes to avoid generating
352+
* text that might be invalid in the current encoding, or that might
353+
* change to something else if passed through an encoding conversion
354+
* (leading to failing to de-escape to the original bytea value).
355+
* Also of course backslash itself has to be escaped.
353356
*
354-
* De-escapes \\ and any \### octal
357+
* De-escaping processes \\ and any \### octal
355358
*/
356359

357360
#define VAL(CH) ((CH) - '0')
@@ -366,16 +369,18 @@ esc_encode(const char *src, unsigned srclen, char *dst)
366369

367370
while (src < end)
368371
{
369-
if (*src == '\0')
372+
unsigned char c = (unsigned char) *src;
373+
374+
if (c == '\0' || IS_HIGHBIT_SET(c))
370375
{
371376
rp[0] = '\\';
372-
rp[1] = '0';
373-
rp[2] = '0';
374-
rp[3] = '0';
377+
rp[1] = DIG(c >> 6);
378+
rp[2] = DIG((c >> 3) & 7);
379+
rp[3] = DIG(c & 7);
375380
rp += 4;
376381
len += 4;
377382
}
378-
else if (*src == '\\')
383+
else if (c == '\\')
379384
{
380385
rp[0] = '\\';
381386
rp[1] = '\\';
@@ -384,7 +389,7 @@ esc_encode(const char *src, unsigned srclen, char *dst)
384389
}
385390
else
386391
{
387-
*rp++ = *src;
392+
*rp++ = c;
388393
len++;
389394
}
390395

@@ -450,7 +455,7 @@ esc_enc_len(const char *src, unsigned srclen)
450455

451456
while (src < end)
452457
{
453-
if (*src == '\0')
458+
if (*src == '\0' || IS_HIGHBIT_SET(*src))
454459
len += 4;
455460
else if (*src == '\\')
456461
len += 2;
@@ -510,7 +515,7 @@ esc_dec_len(const char *src, unsigned srclen)
510515
* Common
511516
*/
512517

513-
static struct
518+
static const struct
514519
{
515520
const char *name;
516521
struct pg_encoding enc;
@@ -543,7 +548,7 @@ static struct
543548
}
544549
};
545550

546-
static struct pg_encoding *
551+
static const struct pg_encoding *
547552
pg_find_encoding(const char *name)
548553
{
549554
int i;

0 commit comments

Comments
 (0)