Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit be92ad4

Browse files
committed
Change case-folding of keywords to conform to SQL99 and fix misbehavior
in Turkish locale. Keywords are now checked under pure ASCII case-folding rules ('A'-'Z'->'a'-'z' and nothing else). However, once a word is determined not to be a keyword, it will be case-folded under the current locale, same as before. See pghackers discussion 20-Feb-01.
1 parent 496373e commit be92ad4

File tree

6 files changed

+230
-107
lines changed

6 files changed

+230
-107
lines changed

src/backend/parser/keywords.c

+53-10
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,22 @@
11
/*-------------------------------------------------------------------------
22
*
33
* keywords.c
4-
* lexical token lookup for reserved words in postgres SQL
4+
* lexical token lookup for reserved words in PostgreSQL
55
*
66
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.88 2001/01/24 19:43:01 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.89 2001/02/21 18:53:46 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
15-
#include <ctype.h>
16-
1715
#include "postgres.h"
1816

17+
#include <ctype.h>
18+
1919
#include "nodes/parsenodes.h"
20-
#include "nodes/pg_list.h"
2120
#include "parser/keywords.h"
2221
#include "parser/parse.h"
2322

@@ -286,18 +285,62 @@ static ScanKeyword ScanKeywords[] = {
286285
{"zone", ZONE},
287286
};
288287

288+
/*
289+
* ScanKeywordLookup - see if a given word is a keyword
290+
*
291+
* Returns a pointer to the ScanKeyword table entry, or NULL if no match.
292+
*
293+
* The match is done case-insensitively. Note that we deliberately use a
294+
* dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
295+
* even if we are in a locale where tolower() would produce more or different
296+
* translations. This is to conform to the SQL99 spec, which says that
297+
* keywords are to be matched in this way even though non-keyword identifiers
298+
* receive a different case-normalization mapping.
299+
*/
289300
ScanKeyword *
290301
ScanKeywordLookup(char *text)
291302
{
292-
ScanKeyword *low = &ScanKeywords[0];
293-
ScanKeyword *high = endof(ScanKeywords) - 1;
294-
ScanKeyword *middle;
295-
int difference;
303+
int len,
304+
i;
305+
char word[NAMEDATALEN];
306+
ScanKeyword *low;
307+
ScanKeyword *high;
308+
309+
len = strlen(text);
310+
/* We assume all keywords are shorter than NAMEDATALEN. */
311+
if (len >= NAMEDATALEN)
312+
return NULL;
313+
314+
/*
315+
* Apply an ASCII-only downcasing. We must not use tolower() since
316+
* it may produce the wrong translation in some locales (eg, Turkish),
317+
* and we don't trust isupper() very much either. In an ASCII-based
318+
* encoding the tests against A and Z are sufficient, but we also check
319+
* isupper() so that we will work correctly under EBCDIC. The actual
320+
* case conversion step should work for either ASCII or EBCDIC.
321+
*/
322+
for (i = 0; i < len; i++)
323+
{
324+
char ch = text[i];
296325

326+
if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
327+
ch += 'a' - 'A';
328+
word[i] = ch;
329+
}
330+
word[len] = '\0';
331+
332+
/*
333+
* Now do a binary search using plain strcmp() comparison.
334+
*/
335+
low = &ScanKeywords[0];
336+
high = endof(ScanKeywords) - 1;
297337
while (low <= high)
298338
{
339+
ScanKeyword *middle;
340+
int difference;
341+
299342
middle = low + (high - low) / 2;
300-
difference = strcmp(middle->name, text);
343+
difference = strcmp(middle->name, word);
301344
if (difference == 0)
302345
return middle;
303346
else if (difference < 0)

src/backend/parser/scan.l

+22-14
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
/*-------------------------------------------------------------------------
33
*
44
* scan.l
5-
* lexical scanner for POSTGRES
5+
* lexical scanner for PostgreSQL
66
*
77
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
*
1111
* IDENTIFICATION
12-
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.86 2001/02/03 20:13:05 petere Exp $
12+
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.87 2001/02/21 18:53:47 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -477,12 +477,27 @@ other .
477477

478478

479479
{identifier} {
480-
int i;
481-
ScanKeyword *keyword;
480+
ScanKeyword *keyword;
481+
int i;
482482

483-
for(i = 0; yytext[i]; i++)
483+
/* Is it a keyword? */
484+
keyword = ScanKeywordLookup((char*) yytext);
485+
if (keyword != NULL)
486+
return keyword->value;
487+
488+
/*
489+
* No. Convert the identifier to lower case, and truncate
490+
* if necessary.
491+
*
492+
* Note: here we use a locale-dependent case conversion,
493+
* which seems appropriate under SQL99 rules, whereas
494+
* the keyword comparison was NOT locale-dependent.
495+
*/
496+
for (i = 0; yytext[i]; i++)
497+
{
484498
if (isupper((unsigned char) yytext[i]))
485499
yytext[i] = tolower((unsigned char) yytext[i]);
500+
}
486501
if (i >= NAMEDATALEN)
487502
{
488503
#ifdef MULTIBYTE
@@ -497,15 +512,8 @@ other .
497512
yytext[NAMEDATALEN-1] = '\0';
498513
#endif
499514
}
500-
keyword = ScanKeywordLookup((char*)yytext);
501-
if (keyword != NULL) {
502-
return keyword->value;
503-
}
504-
else
505-
{
506-
yylval.str = pstrdup((char*)yytext);
507-
return IDENT;
508-
}
515+
yylval.str = pstrdup((char*) yytext);
516+
return IDENT;
509517
}
510518

511519
{other} { return yytext[0]; }

src/backend/utils/adt/ruleutils.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* back to source text
44
*
55
* IDENTIFICATION
6-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.72 2001/02/14 21:35:05 tgl Exp $
6+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.73 2001/02/21 18:53:47 tgl Exp $
77
*
88
* This software is copyrighted by Jan Wieck - Hamburg.
99
*
@@ -2563,8 +2563,8 @@ quote_identifier(char *ident)
25632563
* but the parser doesn't provide any easy way to test for whether
25642564
* an identifier is safe or not... so be safe not sorry.
25652565
*
2566-
* Note: ScanKeywordLookup() expects an all-lower-case input, but
2567-
* we've already checked we have that.
2566+
* Note: ScanKeywordLookup() does case-insensitive comparison,
2567+
* but that's fine, since we already know we have all-lower-case.
25682568
*/
25692569
if (ScanKeywordLookup(ident) != NULL)
25702570
safe = false;

src/interfaces/ecpg/preproc/ecpg_keywords.c

+54-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
/*-------------------------------------------------------------------------
22
*
3-
* keywords.c
3+
* ecpg_keywords.c
44
* lexical token lookup for reserved words in postgres embedded SQL
55
*
6+
* IDENTIFICATION
7+
* $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.22 2001/02/21 18:53:47 tgl Exp $
8+
*
69
*-------------------------------------------------------------------------
710
*/
811
#include "postgres_fe.h"
@@ -12,6 +15,7 @@
1215
#include "extern.h"
1316
#include "preproc.h"
1417

18+
1519
/*
1620
* List of (keyword-name, keyword-token-value) pairs.
1721
*
@@ -73,18 +77,62 @@ static ScanKeyword ScanKeywords[] = {
7377
{"whenever", SQL_WHENEVER},
7478
};
7579

80+
/*
81+
* ScanECPGKeywordLookup - see if a given word is a keyword
82+
*
83+
* Returns a pointer to the ScanKeyword table entry, or NULL if no match.
84+
*
85+
* The match is done case-insensitively. Note that we deliberately use a
86+
* dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
87+
* even if we are in a locale where tolower() would produce more or different
88+
* translations. This is to conform to the SQL99 spec, which says that
89+
* keywords are to be matched in this way even though non-keyword identifiers
90+
* receive a different case-normalization mapping.
91+
*/
7692
ScanKeyword *
7793
ScanECPGKeywordLookup(char *text)
7894
{
79-
ScanKeyword *low = &ScanKeywords[0];
80-
ScanKeyword *high = endof(ScanKeywords) - 1;
81-
ScanKeyword *middle;
82-
int difference;
95+
int len,
96+
i;
97+
char word[NAMEDATALEN];
98+
ScanKeyword *low;
99+
ScanKeyword *high;
83100

101+
len = strlen(text);
102+
/* We assume all keywords are shorter than NAMEDATALEN. */
103+
if (len >= NAMEDATALEN)
104+
return NULL;
105+
106+
/*
107+
* Apply an ASCII-only downcasing. We must not use tolower() since
108+
* it may produce the wrong translation in some locales (eg, Turkish),
109+
* and we don't trust isupper() very much either. In an ASCII-based
110+
* encoding the tests against A and Z are sufficient, but we also check
111+
* isupper() so that we will work correctly under EBCDIC. The actual
112+
* case conversion step should work for either ASCII or EBCDIC.
113+
*/
114+
for (i = 0; i < len; i++)
115+
{
116+
char ch = text[i];
117+
118+
if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
119+
ch += 'a' - 'A';
120+
word[i] = ch;
121+
}
122+
word[len] = '\0';
123+
124+
/*
125+
* Now do a binary search using plain strcmp() comparison.
126+
*/
127+
low = &ScanKeywords[0];
128+
high = endof(ScanKeywords) - 1;
84129
while (low <= high)
85130
{
131+
ScanKeyword *middle;
132+
int difference;
133+
86134
middle = low + (high - low) / 2;
87-
difference = strcmp(middle->name, text);
135+
difference = strcmp(middle->name, word);
88136
if (difference == 0)
89137
return middle;
90138
else if (difference < 0)

0 commit comments

Comments
 (0)