Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit eb0a773

Browse files
committed
Perform post-escaping encoding validity checks on SQL literals and COPY input
so that invalidly encoded data cannot enter the database by these means.
1 parent 22b613e commit eb0a773

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

src/backend/commands/copy.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.286 2007/09/07 20:59:26 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.287 2007/09/12 20:49:27 adunstan Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -2685,6 +2685,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
26852685
char *start_ptr;
26862686
char *end_ptr;
26872687
int input_len;
2688+
bool saw_high_bit = false;
26882689

26892690
/* Make sure space remains in fieldvals[] */
26902691
if (fieldno >= maxfields)
@@ -2749,6 +2750,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27492750
}
27502751
}
27512752
c = val & 0377;
2753+
if (IS_HIGHBIT_SET(c))
2754+
saw_high_bit = true;
27522755
}
27532756
break;
27542757
case 'x':
@@ -2772,6 +2775,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27722775
}
27732776
}
27742777
c = val & 0xff;
2778+
if (IS_HIGHBIT_SET(c))
2779+
saw_high_bit = true;
27752780
}
27762781
}
27772782
break;
@@ -2799,7 +2804,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27992804
* literally
28002805
*/
28012806
}
2802-
}
2807+
}
28032808

28042809
/* Add c to output string */
28052810
*output_ptr++ = c;
@@ -2808,6 +2813,16 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
28082813
/* Terminate attribute value in output area */
28092814
*output_ptr++ = '\0';
28102815

2816+
/* If we de-escaped a char with the high bit set, make sure
2817+
* we still have valid data for the db encoding. Avoid calling strlen
2818+
* here for the sake of efficiency.
2819+
*/
2820+
if (saw_high_bit)
2821+
{
2822+
char *fld = fieldvals[fieldno];
2823+
pg_verifymbstr(fld, output_ptr - (fld + 1), false);
2824+
}
2825+
28112826
/* Check whether raw input matched null marker */
28122827
input_len = end_ptr - start_ptr;
28132828
if (input_len == cstate->null_print_len &&

src/backend/parser/scan.l

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Portions Copyright (c) 1994, Regents of the University of California
2525
*
2626
* IDENTIFICATION
27-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.140 2007/08/12 20:18:06 tgl Exp $
27+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.141 2007/09/12 20:49:27 adunstan Exp $
2828
*
2929
*-------------------------------------------------------------------------
3030
*/
@@ -60,6 +60,7 @@ bool escape_string_warning = true;
6060
bool standard_conforming_strings = false;
6161

6262
static bool warn_on_first_escape;
63+
static bool saw_high_bit = false;
6364

6465
/*
6566
* literalbuf is used to accumulate literal values when multiple rules
@@ -426,6 +427,7 @@ other .
426427

427428
{xqstart} {
428429
warn_on_first_escape = true;
430+
saw_high_bit = false;
429431
SET_YYLLOC();
430432
if (standard_conforming_strings)
431433
BEGIN(xq);
@@ -435,6 +437,7 @@ other .
435437
}
436438
{xestart} {
437439
warn_on_first_escape = false;
440+
saw_high_bit = false;
438441
SET_YYLLOC();
439442
BEGIN(xe);
440443
startlit();
@@ -443,6 +446,11 @@ other .
443446
<xq,xe>{quotefail} {
444447
yyless(1);
445448
BEGIN(INITIAL);
449+
/* check that the data remains valid if it might have been
450+
* made invalid by unescaping any chars.
451+
*/
452+
if (saw_high_bit)
453+
pg_verifymbstr(literalbuf, literallen, false);
446454
yylval.str = litbufdup();
447455
return SCONST;
448456
}
@@ -475,12 +483,16 @@ other .
475483

476484
check_escape_warning();
477485
addlitchar(c);
486+
if (IS_HIGHBIT_SET(c))
487+
saw_high_bit = true;
478488
}
479489
<xe>{xehexesc} {
480490
unsigned char c = strtoul(yytext+2, NULL, 16);
481491

482492
check_escape_warning();
483493
addlitchar(c);
494+
if (IS_HIGHBIT_SET(c))
495+
saw_high_bit = true;
484496
}
485497
<xq,xe>{quotecontinue} {
486498
/* ignore */
@@ -892,6 +904,14 @@ litbufdup(void)
892904
static unsigned char
893905
unescape_single_char(unsigned char c)
894906
{
907+
/* Normally we wouldn't expect to see \n where n has its high bit set
908+
* but we set the flag to check the string if we do get it, so
909+
* that this doesn't become a way of getting around the coding validity
910+
* checks.
911+
*/
912+
if (IS_HIGHBIT_SET(c))
913+
saw_high_bit = true;
914+
895915
switch (c)
896916
{
897917
case 'b':

0 commit comments

Comments
 (0)