@@ -104,6 +104,7 @@ fmtIdEnc(const char *rawid, int encoding)
104
104
105
105
const char * cp ;
106
106
bool need_quotes = false;
107
+ size_t remaining = strlen (rawid );
107
108
108
109
/*
109
110
* These checks need to match the identifier production in scan.l. Don't
@@ -117,7 +118,8 @@ fmtIdEnc(const char *rawid, int encoding)
117
118
else
118
119
{
119
120
/* otherwise check the entire string */
120
- for (cp = rawid ; * cp ; cp ++ )
121
+ cp = rawid ;
122
+ for (size_t i = 0 ; i < remaining ; i ++ , cp ++ )
121
123
{
122
124
if (!((* cp >= 'a' && * cp <= 'z' )
123
125
|| (* cp >= '0' && * cp <= '9' )
@@ -153,17 +155,90 @@ fmtIdEnc(const char *rawid, int encoding)
153
155
else
154
156
{
155
157
appendPQExpBufferChar (id_return , '"' );
156
- for (cp = rawid ; * cp ; cp ++ )
158
+
159
+ cp = & rawid [0 ];
160
+ while (remaining > 0 )
157
161
{
158
- /*
159
- * Did we find a double-quote in the string? Then make this a
160
- * double double-quote per SQL99. Before, we put in a
161
- * backslash/double-quote pair. - thomas 2000-08-05
162
- */
163
- if (* cp == '"' )
164
- appendPQExpBufferChar (id_return , '"' );
165
- appendPQExpBufferChar (id_return , * cp );
162
+ int charlen ;
163
+
164
+ /* Fast path for plain ASCII */
165
+ if (!IS_HIGHBIT_SET (* cp ))
166
+ {
167
+ /*
168
+ * Did we find a double-quote in the string? Then make this a
169
+ * double double-quote per SQL99. Before, we put in a
170
+ * backslash/double-quote pair. - thomas 2000-08-05
171
+ */
172
+ if (* cp == '"' )
173
+ appendPQExpBufferChar (id_return , '"' );
174
+ appendPQExpBufferChar (id_return , * cp );
175
+ remaining -- ;
176
+ cp ++ ;
177
+ continue ;
178
+ }
179
+
180
+ /* Slow path for possible multibyte characters */
181
+ charlen = pg_encoding_mblen (encoding , cp );
182
+
183
+ if (remaining < charlen )
184
+ {
185
+ /*
186
+ * If the character is longer than the available input,
187
+ * replace the string with an invalid sequence. The invalid
188
+ * sequence ensures that the escaped string will trigger an
189
+ * error on the server-side, even if we can't directly report
190
+ * an error here.
191
+ */
192
+ enlargePQExpBuffer (id_return , 2 );
193
+ pg_encoding_set_invalid (encoding ,
194
+ id_return -> data + id_return -> len );
195
+ id_return -> len += 2 ;
196
+ id_return -> data [id_return -> len ] = '\0' ;
197
+
198
+ /* there's no more input data, so we can stop */
199
+ break ;
200
+ }
201
+ else if (pg_encoding_verifymbchar (encoding , cp , charlen ) == -1 )
202
+ {
203
+ /*
204
+ * Multibyte character is invalid. It's important to verify
205
+ * that as invalid multi-byte characters could e.g. be used to
206
+ * "skip" over quote characters, e.g. when parsing
207
+ * character-by-character.
208
+ *
209
+ * Replace the bytes corresponding to the invalid character
210
+ * with an invalid sequence, for the same reason as above.
211
+ *
212
+ * It would be a bit faster to verify the whole string the
213
+ * first time we encounter a set highbit, but this way we can
214
+ * replace just the invalid characters, which probably makes
215
+ * it easier for users to find the invalidly encoded portion
216
+ * of a larger string.
217
+ */
218
+ enlargePQExpBuffer (id_return , 2 );
219
+ pg_encoding_set_invalid (encoding ,
220
+ id_return -> data + id_return -> len );
221
+ id_return -> len += 2 ;
222
+ id_return -> data [id_return -> len ] = '\0' ;
223
+
224
+ /*
225
+ * Copy the rest of the string after the invalid multi-byte
226
+ * character.
227
+ */
228
+ remaining -= charlen ;
229
+ cp += charlen ;
230
+ }
231
+ else
232
+ {
233
+ for (int i = 0 ; i < charlen ; i ++ )
234
+ {
235
+ appendPQExpBufferChar (id_return , * cp );
236
+ remaining -- ;
237
+ cp ++ ;
238
+ }
239
+ }
166
240
}
241
+
167
242
appendPQExpBufferChar (id_return , '"' );
168
243
}
169
244
@@ -290,17 +365,18 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
290
365
size_t length = strlen (str );
291
366
const char * source = str ;
292
367
char * target ;
368
+ size_t remaining = length ;
293
369
294
370
if (!enlargePQExpBuffer (buf , 2 * length + 2 ))
295
371
return ;
296
372
297
373
target = buf -> data + buf -> len ;
298
374
* target ++ = '\'' ;
299
375
300
- while (* source != '\0' )
376
+ while (remaining > 0 )
301
377
{
302
378
char c = * source ;
303
- int len ;
379
+ int charlen ;
304
380
int i ;
305
381
306
382
/* Fast path for plain ASCII */
@@ -312,39 +388,65 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
312
388
/* Copy the character */
313
389
* target ++ = c ;
314
390
source ++ ;
391
+ remaining -- ;
315
392
continue ;
316
393
}
317
394
318
395
/* Slow path for possible multibyte characters */
319
- len = PQmblen (source , encoding );
396
+ charlen = PQmblen (source , encoding );
320
397
321
- /* Copy the character */
322
- for (i = 0 ; i < len ; i ++ )
398
+ if (remaining < charlen )
323
399
{
324
- if (* source == '\0' )
325
- break ;
326
- * target ++ = * source ++ ;
327
- }
400
+ /*
401
+ * If the character is longer than the available input, replace
402
+ * the string with an invalid sequence. The invalid sequence
403
+ * ensures that the escaped string will trigger an error on the
404
+ * server-side, even if we can't directly report an error here.
405
+ *
406
+ * We know there's enough space for the invalid sequence because
407
+ * the "target" buffer is 2 * length + 2 long, and at worst we're
408
+ * replacing a single input byte with two invalid bytes.
409
+ */
410
+ pg_encoding_set_invalid (encoding , target );
411
+ target += 2 ;
328
412
329
- /*
330
- * If we hit premature end of string (ie, incomplete multibyte
331
- * character), try to pad out to the correct length with spaces. We
332
- * may not be able to pad completely, but we will always be able to
333
- * insert at least one pad space (since we'd not have quoted a
334
- * multibyte character). This should be enough to make a string that
335
- * the server will error out on.
336
- */
337
- if (i < len )
413
+ /* there's no more valid input data, so we can stop */
414
+ break ;
415
+ }
416
+ else if (pg_encoding_verifymbchar (encoding , source , charlen ) == -1 )
338
417
{
339
- char * stop = buf -> data + buf -> maxlen - 2 ;
418
+ /*
419
+ * Multibyte character is invalid. It's important to verify that
420
+ * as invalid multi-byte characters could e.g. be used to "skip"
421
+ * over quote characters, e.g. when parsing
422
+ * character-by-character.
423
+ *
424
+ * Replace the bytes corresponding to the invalid character with
425
+ * an invalid sequence, for the same reason as above.
426
+ *
427
+ * It would be a bit faster to verify the whole string the first
428
+ * time we encounter a set highbit, but this way we can replace
429
+ * just the invalid characters, which probably makes it easier for
430
+ * users to find the invalidly encoded portion of a larger string.
431
+ */
432
+ pg_encoding_set_invalid (encoding , target );
433
+ target += 2 ;
434
+ remaining -= charlen ;
340
435
341
- for (; i < len ; i ++ )
436
+ /*
437
+ * Copy the rest of the string after the invalid multi-byte
438
+ * character.
439
+ */
440
+ source += charlen ;
441
+ }
442
+ else
443
+ {
444
+ /* Copy the character */
445
+ for (i = 0 ; i < charlen ; i ++ )
342
446
{
343
- if (target >= stop )
344
- break ;
345
- * target ++ = ' ' ;
447
+ * target ++ = * source ++ ;
448
+ remaining -- ;
346
449
}
347
- break ;
348
450
}
349
451
}
350
452
0 commit comments