Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8e35bbd

Browse files
committed
Remove fixed-size literal buffer from scan.l, and repair
boundary-condition bug in myinput() which caused flex scanner to fail on tokens larger than a bufferload. Turns out flex doesn't want null- terminated input ... and if it gives you a 1-character buffer, you'd better supply a character, not a null, lest you be thought to be reporting end of input.
1 parent d07766f commit 8e35bbd

File tree

2 files changed

+73
-62
lines changed

2 files changed

+73
-62
lines changed

src/backend/parser/Makefile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Makefile for parser
55
#
66
# IDENTIFICATION
7-
# $Header: /cvsroot/pgsql/src/backend/parser/Makefile,v 1.20 1999/05/03 19:09:40 momjian Exp $
7+
# $Header: /cvsroot/pgsql/src/backend/parser/Makefile,v 1.21 1999/10/18 02:42:31 tgl Exp $
88
#
99
#-------------------------------------------------------------------------
1010

@@ -37,9 +37,7 @@ gram.c parse.h: gram.y
3737

3838
scan.c: scan.l
3939
$(LEX) $<
40-
sed -e 's/#define YY_BUF_SIZE .*/#define YY_BUF_SIZE 65536/' \
41-
<lex.yy.c >scan.c
42-
rm -f lex.yy.c
40+
mv lex.yy.c scan.c
4341

4442
# The following dependencies on parse.h are computed by
4543
# make depend, but we state them here explicitly anyway because

src/backend/parser/scan.l

Lines changed: 71 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.59 1999/10/09 01:32:38 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.60 1999/10/18 02:42:31 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -20,6 +20,7 @@
2020
#include <errno.h>
2121

2222
#include "postgres.h"
23+
2324
#include "miscadmin.h"
2425
#include "nodes/parsenodes.h"
2526
#include "nodes/pg_list.h"
@@ -29,16 +30,6 @@
2930
#include "parser/scansup.h"
3031
#include "utils/builtins.h"
3132

32-
#ifdef YY_READ_BUF_SIZE
33-
#undef YY_READ_BUF_SIZE
34-
#endif
35-
#define YY_READ_BUF_SIZE MAX_PARSE_BUFFER
36-
37-
#ifdef YY_READ_BUF_SIZE
38-
#undef YY_READ_BUF_SIZE
39-
#endif
40-
#define YY_READ_BUF_SIZE MAX_PARSE_BUFFER
41-
4233
extern char *parseString;
4334
static char *parseCh;
4435

@@ -47,9 +38,8 @@ static char *parseCh;
4738
#undef yywrap
4839
#endif /* yywrap */
4940

41+
/* set up my input handler --- need one flavor for flex, one for lex */
5042
#if defined(FLEX_SCANNER)
51-
/* MAX_PARSE_BUFFER is defined in miscadmin.h */
52-
#define YYLMAX MAX_PARSE_BUFFER
5343
#define YY_NO_UNPUT
5444
static int myinput(char* buf, int max);
5545
#undef YY_INPUT
@@ -63,8 +53,18 @@ void unput(char);
6353

6454
extern YYSTYPE yylval;
6555

66-
int llen;
67-
char literal[MAX_PARSE_BUFFER];
56+
/*
57+
* literalbuf is used to accumulate literal values when multiple rules
58+
* are needed to parse a single literal. Call startlit to reset buffer
59+
* to empty, addlit to add text. Note that the buffer is palloc'd and
60+
* starts life afresh on every parse cycle.
61+
*/
62+
static char *literalbuf; /* expandable buffer */
63+
static int literallen; /* actual current length */
64+
static int literalalloc; /* current allocated buffer size */
65+
66+
#define startlit() (literalbuf[0] = '\0', literallen = 0)
67+
static void addlit(char *ytext, int yleng);
6868

6969
%}
7070
/* OK, here is a short description of lex/flex rules behavior.
@@ -153,17 +153,14 @@ self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
153153
op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
154154
operator {op_and_self}+
155155

156-
/* we do not allow unary minus in numbers.
157-
* instead we pass it verbatim to parser. there it gets
156+
/* we no longer allow unary minus in numbers.
157+
* instead we pass it separately to parser. there it gets
158158
* coerced via doNegate() -- Leon aug 20 1999
159159
*/
160160

161161
integer {digit}+
162162
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
163163
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
164-
/*
165-
real (((({digit}*\.{digit}+)|({digit}+\.{digit}*))([Ee][-+]?{digit}+)?)|({digit}+[Ee][-+]?{digit}+))
166-
*/
167164

168165
param \${integer}
169166

@@ -199,88 +196,77 @@ other .
199196

200197
{xbstart} {
201198
BEGIN(xb);
202-
llen = 0;
203-
*literal = '\0';
199+
startlit();
204200
}
205201
<xb>{xbstop} {
206202
char* endptr;
207203

208204
BEGIN(INITIAL);
209205
errno = 0;
210-
yylval.ival = strtol((char *)literal,&endptr,2);
206+
yylval.ival = strtol(literalbuf, &endptr, 2);
211207
if (*endptr != '\0' || errno == ERANGE)
212-
elog(ERROR,"Bad binary integer input '%s'",literal);
208+
elog(ERROR, "Bad binary integer input '%s'",
209+
literalbuf);
213210
return ICONST;
214211
}
215212
<xh>{xhinside} |
216213
<xb>{xbinside} {
217-
if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
218-
elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
219-
memcpy(literal+llen, yytext, yyleng+1);
220-
llen += yyleng;
214+
addlit(yytext, yyleng);
221215
}
222216
<xh>{xhcat} |
223217
<xb>{xbcat} {
224218
}
225219

226220
{xhstart} {
227221
BEGIN(xh);
228-
llen = 0;
229-
*literal = '\0';
222+
startlit();
230223
}
231224
<xh>{xhstop} {
232225
char* endptr;
233226

234227
BEGIN(INITIAL);
235228
errno = 0;
236-
yylval.ival = strtol((char *)literal,&endptr,16);
229+
yylval.ival = strtol(literalbuf, &endptr, 16);
237230
if (*endptr != '\0' || errno == ERANGE)
238-
elog(ERROR,"Bad hexadecimal integer input '%s'",literal);
231+
elog(ERROR, "Bad hexadecimal integer input '%s'",
232+
literalbuf);
239233
return ICONST;
240234
}
241235

242236
{xqstart} {
243237
BEGIN(xq);
244-
llen = 0;
245-
*literal = '\0';
238+
startlit();
246239
}
247240
<xq>{xqstop} {
248241
BEGIN(INITIAL);
249-
yylval.str = scanstr(literal);
242+
yylval.str = scanstr(literalbuf);
250243
return SCONST;
251244
}
252245
<xq>{xqdouble} |
253246
<xq>{xqinside} |
254247
<xq>{xqliteral} {
255-
if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
256-
elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
257-
memcpy(literal+llen, yytext, yyleng+1);
258-
llen += yyleng;
248+
addlit(yytext, yyleng);
259249
}
260250
<xq>{xqcat} {
261251
}
262252

263253

264254
{xdstart} {
265255
BEGIN(xd);
266-
llen = 0;
267-
*literal = '\0';
256+
startlit();
268257
}
269258
<xd>{xdstop} {
270259
BEGIN(INITIAL);
271-
yylval.str = pstrdup(literal);
260+
yylval.str = pstrdup(literalbuf);
272261
return IDENT;
273262
}
274263
<xd>{xdinside} {
275-
if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
276-
elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
277-
memcpy(literal+llen, yytext, yyleng+1);
278-
llen += yyleng;
264+
addlit(yytext, yyleng);
279265
}
280266

281267
{typecast} { return TYPECAST; }
282268

283-
{self} { return yytext[0]; }
269+
{self} { return yytext[0]; }
284270

285271
{operator} {
286272
if (strcmp((char*)yytext,"!=") == 0)
@@ -391,14 +377,37 @@ init_io()
391377
because input()/myinput() checks the non-nullness of parseCh
392378
to know when to pass the string to lex/flex */
393379
parseCh = NULL;
380+
381+
/* initialize literal buffer to a reasonable but expansible size */
382+
literalalloc = 128;
383+
literalbuf = (char *) palloc(literalalloc);
384+
startlit();
385+
394386
#if defined(FLEX_SCANNER)
395387
if (YY_CURRENT_BUFFER)
396388
yy_flush_buffer(YY_CURRENT_BUFFER);
397389
#endif /* FLEX_SCANNER */
398390
BEGIN INITIAL;
399391
}
400392

393+
static void
394+
addlit(char *ytext, int yleng)
395+
{
396+
/* enlarge buffer if needed */
397+
if ((literallen+yleng) >= literalalloc)
398+
{
399+
do {
400+
literalalloc *= 2;
401+
} while ((literallen+yleng) >= literalalloc);
402+
literalbuf = (char *) repalloc(literalbuf, literalalloc);
403+
}
404+
/* append data --- note we assume ytext is null-terminated */
405+
memcpy(literalbuf+literallen, ytext, yleng+1);
406+
literallen += yleng;
407+
}
408+
401409
#if !defined(FLEX_SCANNER)
410+
402411
/* get lex input from a string instead of from stdin */
403412
int
404413
input()
@@ -420,27 +429,31 @@ unput(char c)
420429
else if (c != 0)
421430
*--parseCh = c;
422431
}
432+
423433
#endif /* !defined(FLEX_SCANNER) */
424434

425435
#ifdef FLEX_SCANNER
436+
426437
/* input routine for flex to read input from a string instead of a file */
427438
static int
428439
myinput(char* buf, int max)
429440
{
430-
int len, copylen;
441+
int len;
431442

432443
if (parseCh == NULL)
433444
parseCh = parseString;
434445
len = strlen(parseCh); /* remaining data available */
435-
if (len >= max)
436-
copylen = max - 1;
437-
else
438-
copylen = len;
439-
if (copylen > 0)
440-
memcpy(buf, parseCh, copylen);
441-
buf[copylen] = '\0';
442-
parseCh += copylen;
443-
return copylen;
446+
/* Note: this code used to think that flex wants a null-terminated
447+
* string. It does NOT, and returning 1 less character than it asks
448+
* for will cause failure under the right boundary conditions. So
449+
* shut up and fill the buffer to the limit, you hear?
450+
*/
451+
if (len > max)
452+
len = max;
453+
if (len > 0)
454+
memcpy(buf, parseCh, len);
455+
parseCh += len;
456+
return len;
444457
}
445-
#endif /* FLEX_SCANNER */
446458

459+
#endif /* FLEX_SCANNER */

0 commit comments

Comments
 (0)