Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 43a7723

Browse files
committed
Fix handling of invalidly encoded data in escaping functions
Previously invalidly encoded input to various escaping functions could lead to the escaped string getting incorrectly parsed by psql. To be safe, escaping functions need to ensure that neither invalid nor incomplete multi-byte characters can be used to "escape" from being quoted. Functions which can report errors now return an error in more cases than before. Functions that cannot report errors now replace invalid input bytes with a byte sequence that cannot be used to escape the quotes and that is guaranteed to error out when a query is sent to the server. The following functions are fixed by this commit: - PQescapeLiteral() - PQescapeIdentifier() - PQescapeString() - PQescapeStringConn() - fmtId() - appendStringLiteral() Reported-by: Stephen Fewer <stephen_fewer@rapid7.com> Reviewed-by: Noah Misch <noah@leadboat.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Backpatch-through: 13 Security: CVE-2025-1094
1 parent 61ad93c commit 43a7723

File tree

2 files changed

+236
-68
lines changed

2 files changed

+236
-68
lines changed

src/fe_utils/string_utils.c

+136-34
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ fmtIdEnc(const char *rawid, int encoding)
104104

105105
const char *cp;
106106
bool need_quotes = false;
107+
size_t remaining = strlen(rawid);
107108

108109
/*
109110
* These checks need to match the identifier production in scan.l. Don't
@@ -117,7 +118,8 @@ fmtIdEnc(const char *rawid, int encoding)
117118
else
118119
{
119120
/* otherwise check the entire string */
120-
for (cp = rawid; *cp; cp++)
121+
cp = rawid;
122+
for (size_t i = 0; i < remaining; i++, cp++)
121123
{
122124
if (!((*cp >= 'a' && *cp <= 'z')
123125
|| (*cp >= '0' && *cp <= '9')
@@ -153,17 +155,90 @@ fmtIdEnc(const char *rawid, int encoding)
153155
else
154156
{
155157
appendPQExpBufferChar(id_return, '"');
156-
for (cp = rawid; *cp; cp++)
158+
159+
cp = &rawid[0];
160+
while (remaining > 0)
157161
{
158-
/*
159-
* Did we find a double-quote in the string? Then make this a
160-
* double double-quote per SQL99. Before, we put in a
161-
* backslash/double-quote pair. - thomas 2000-08-05
162-
*/
163-
if (*cp == '"')
164-
appendPQExpBufferChar(id_return, '"');
165-
appendPQExpBufferChar(id_return, *cp);
162+
int charlen;
163+
164+
/* Fast path for plain ASCII */
165+
if (!IS_HIGHBIT_SET(*cp))
166+
{
167+
/*
168+
* Did we find a double-quote in the string? Then make this a
169+
* double double-quote per SQL99. Before, we put in a
170+
* backslash/double-quote pair. - thomas 2000-08-05
171+
*/
172+
if (*cp == '"')
173+
appendPQExpBufferChar(id_return, '"');
174+
appendPQExpBufferChar(id_return, *cp);
175+
remaining--;
176+
cp++;
177+
continue;
178+
}
179+
180+
/* Slow path for possible multibyte characters */
181+
charlen = pg_encoding_mblen(encoding, cp);
182+
183+
if (remaining < charlen)
184+
{
185+
/*
186+
* If the character is longer than the available input,
187+
* replace the string with an invalid sequence. The invalid
188+
* sequence ensures that the escaped string will trigger an
189+
* error on the server-side, even if we can't directly report
190+
* an error here.
191+
*/
192+
enlargePQExpBuffer(id_return, 2);
193+
pg_encoding_set_invalid(encoding,
194+
id_return->data + id_return->len);
195+
id_return->len += 2;
196+
id_return->data[id_return->len] = '\0';
197+
198+
/* there's no more input data, so we can stop */
199+
break;
200+
}
201+
else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
202+
{
203+
/*
204+
* Multibyte character is invalid. It's important to verify
205+
* that as invalid multi-byte characters could e.g. be used to
206+
* "skip" over quote characters, e.g. when parsing
207+
* character-by-character.
208+
*
209+
* Replace the bytes corresponding to the invalid character
210+
* with an invalid sequence, for the same reason as above.
211+
*
212+
* It would be a bit faster to verify the whole string the
213+
* first time we encounter a set highbit, but this way we can
214+
* replace just the invalid characters, which probably makes
215+
* it easier for users to find the invalidly encoded portion
216+
* of a larger string.
217+
*/
218+
enlargePQExpBuffer(id_return, 2);
219+
pg_encoding_set_invalid(encoding,
220+
id_return->data + id_return->len);
221+
id_return->len += 2;
222+
id_return->data[id_return->len] = '\0';
223+
224+
/*
225+
* Copy the rest of the string after the invalid multi-byte
226+
* character.
227+
*/
228+
remaining -= charlen;
229+
cp += charlen;
230+
}
231+
else
232+
{
233+
for (int i = 0; i < charlen; i++)
234+
{
235+
appendPQExpBufferChar(id_return, *cp);
236+
remaining--;
237+
cp++;
238+
}
239+
}
166240
}
241+
167242
appendPQExpBufferChar(id_return, '"');
168243
}
169244

@@ -290,17 +365,18 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
290365
size_t length = strlen(str);
291366
const char *source = str;
292367
char *target;
368+
size_t remaining = length;
293369

294370
if (!enlargePQExpBuffer(buf, 2 * length + 2))
295371
return;
296372

297373
target = buf->data + buf->len;
298374
*target++ = '\'';
299375

300-
while (*source != '\0')
376+
while (remaining > 0)
301377
{
302378
char c = *source;
303-
int len;
379+
int charlen;
304380
int i;
305381

306382
/* Fast path for plain ASCII */
@@ -312,39 +388,65 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
312388
/* Copy the character */
313389
*target++ = c;
314390
source++;
391+
remaining--;
315392
continue;
316393
}
317394

318395
/* Slow path for possible multibyte characters */
319-
len = PQmblen(source, encoding);
396+
charlen = PQmblen(source, encoding);
320397

321-
/* Copy the character */
322-
for (i = 0; i < len; i++)
398+
if (remaining < charlen)
323399
{
324-
if (*source == '\0')
325-
break;
326-
*target++ = *source++;
327-
}
400+
/*
401+
* If the character is longer than the available input, replace
402+
* the string with an invalid sequence. The invalid sequence
403+
* ensures that the escaped string will trigger an error on the
404+
* server-side, even if we can't directly report an error here.
405+
*
406+
* We know there's enough space for the invalid sequence because
407+
* the "target" buffer is 2 * length + 2 long, and at worst we're
408+
* replacing a single input byte with two invalid bytes.
409+
*/
410+
pg_encoding_set_invalid(encoding, target);
411+
target += 2;
328412

329-
/*
330-
* If we hit premature end of string (ie, incomplete multibyte
331-
* character), try to pad out to the correct length with spaces. We
332-
* may not be able to pad completely, but we will always be able to
333-
* insert at least one pad space (since we'd not have quoted a
334-
* multibyte character). This should be enough to make a string that
335-
* the server will error out on.
336-
*/
337-
if (i < len)
413+
/* there's no more valid input data, so we can stop */
414+
break;
415+
}
416+
else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
338417
{
339-
char *stop = buf->data + buf->maxlen - 2;
418+
/*
419+
* Multibyte character is invalid. It's important to verify that
420+
* as invalid multi-byte characters could e.g. be used to "skip"
421+
* over quote characters, e.g. when parsing
422+
* character-by-character.
423+
*
424+
* Replace the bytes corresponding to the invalid character with
425+
* an invalid sequence, for the same reason as above.
426+
*
427+
* It would be a bit faster to verify the whole string the first
428+
* time we encounter a set highbit, but this way we can replace
429+
* just the invalid characters, which probably makes it easier for
430+
* users to find the invalidly encoded portion of a larger string.
431+
*/
432+
pg_encoding_set_invalid(encoding, target);
433+
target += 2;
434+
remaining -= charlen;
340435

341-
for (; i < len; i++)
436+
/*
437+
* Copy the rest of the string after the invalid multi-byte
438+
* character.
439+
*/
440+
source += charlen;
441+
}
442+
else
443+
{
444+
/* Copy the character */
445+
for (i = 0; i < charlen; i++)
342446
{
343-
if (target >= stop)
344-
break;
345-
*target++ = ' ';
447+
*target++ = *source++;
448+
remaining--;
346449
}
347-
break;
348450
}
349451
}
350452

0 commit comments

Comments
 (0)