Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 97b6144

Browse files
committed
Make postgres.bki use the same literal-string syntax as postgresql.conf.
The BKI file's string quoting conventions were previously quite weird, perhaps as a result of repurposing a function built to scan single-quoted strings to scan double-quoted ones. Change to use the same rules as we use in GUC files, allowing some simplifications in genbki.pl and initdb.c. While at it, completely remove the backend's scanstr() function, which was essentially a duplicate of the string dequoting code in guc-file.l. Instead export that one (under a less generic name than it had) and let bootscanner.l use it. Now we can clarify that scansup.c exists only to support the main lexer. We could alternatively have removed GUC_scanstr, but this way seems better since the previous arrangement could mislead a reader into thinking that scanstr() had something to do with the main lexer's handling of string literals. Maybe it did once, but if so it was a long time ago. This patch does not bump catversion, since the initially-installed catalog contents don't change. Note however that successful initdb after applying this patch will require up-to-date postgres.bki as well as postgres and initdb executables. In passing, remove a bunch of very-long-obsolete #include's in bootparse.y and bootscanner.l. John Naylor Discussion: https://postgr.es/m/CACPNZCtDpd18T0KATTmCggO2GdVC4ow86ypiq5ENff1VnauL8g@mail.gmail.com
1 parent 9081bdd commit 97b6144

File tree

9 files changed

+31
-182
lines changed

9 files changed

+31
-182
lines changed

doc/src/sgml/bki.sgml

+6-4
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,8 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
752752
next token that syntactically cannot belong to the preceding
753753
command starts a new one. (Usually you would put a new command on
754754
a new line, for clarity.) Tokens can be certain key words, special
755-
characters (parentheses, commas, etc.), numbers, or double-quoted
756-
strings. Everything is case sensitive.
755+
characters (parentheses, commas, etc.), identifiers, numbers, or
756+
single-quoted strings. Everything is case sensitive.
757757
</para>
758758

759759
<para>
@@ -876,7 +876,9 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
876876
<para>
877877
NULL values can be specified using the special key word
878878
<literal>_null_</literal>. Values that do not look like
879-
identifiers or digit strings must be double quoted.
879+
identifiers or digit strings must be single-quoted.
880+
(To include a single quote in a value, write it twice.
881+
Escape-string-style backslash escapes are allowed in the string, too.)
880882
</para>
881883
</listitem>
882884
</varlistentry>
@@ -1046,7 +1048,7 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
10461048
<programlisting>
10471049
create test_table 420 (oid = oid, cola = int4, colb = text)
10481050
open test_table
1049-
insert ( 421 1 "value1" )
1051+
insert ( 421 1 'value 1' )
10501052
insert ( 422 2 _null_ )
10511053
close test_table
10521054
</programlisting>

src/backend/bootstrap/bootparse.y

-19
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,10 @@
1818

1919
#include <unistd.h>
2020

21-
#include "access/attnum.h"
22-
#include "access/htup.h"
23-
#include "access/itup.h"
24-
#include "access/tupdesc.h"
2521
#include "bootstrap/bootstrap.h"
26-
#include "catalog/catalog.h"
2722
#include "catalog/heap.h"
2823
#include "catalog/namespace.h"
2924
#include "catalog/pg_am.h"
30-
#include "catalog/pg_attribute.h"
3125
#include "catalog/pg_authid.h"
3226
#include "catalog/pg_class.h"
3327
#include "catalog/pg_namespace.h"
@@ -36,20 +30,7 @@
3630
#include "commands/defrem.h"
3731
#include "miscadmin.h"
3832
#include "nodes/makefuncs.h"
39-
#include "nodes/nodes.h"
40-
#include "nodes/parsenodes.h"
41-
#include "nodes/pg_list.h"
42-
#include "nodes/primnodes.h"
43-
#include "rewrite/prs2lock.h"
44-
#include "storage/block.h"
45-
#include "storage/fd.h"
46-
#include "storage/ipc.h"
47-
#include "storage/itemptr.h"
48-
#include "storage/off.h"
49-
#include "storage/smgr.h"
50-
#include "tcop/dest.h"
5133
#include "utils/memutils.h"
52-
#include "utils/rel.h"
5334

5435

5536
/*

src/backend/bootstrap/bootscanner.l

+5-24
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,8 @@
1515
*/
1616
#include "postgres.h"
1717

18-
#include "access/attnum.h"
19-
#include "access/htup.h"
20-
#include "access/itup.h"
21-
#include "access/tupdesc.h"
2218
#include "bootstrap/bootstrap.h"
23-
#include "catalog/pg_am.h"
24-
#include "catalog/pg_attribute.h"
25-
#include "catalog/pg_class.h"
26-
#include "nodes/nodes.h"
27-
#include "nodes/parsenodes.h"
28-
#include "nodes/pg_list.h"
29-
#include "nodes/primnodes.h"
30-
#include "parser/scansup.h"
31-
#include "rewrite/prs2lock.h"
32-
#include "storage/block.h"
33-
#include "storage/fd.h"
34-
#include "storage/itemptr.h"
35-
#include "storage/off.h"
36-
#include "utils/rel.h"
19+
#include "utils/guc.h"
3720

3821
/* Not needed now that this file is compiled as part of bootparse. */
3922
/* #include "bootparse.h" */
@@ -66,7 +49,7 @@ static int yyline = 1; /* line number for error reporting */
6649

6750

6851
id [-A-Za-z0-9_]+
69-
sid \"([^\"])*\"
52+
sid \'([^']|\'\')*\'
7053

7154
/*
7255
* Keyword tokens return the keyword text (as a constant string) in yylval.kw,
@@ -120,14 +103,12 @@ NOT { yylval.kw = "NOT"; return XNOT; }
120103
NULL { yylval.kw = "NULL"; return XNULL; }
121104

122105
{id} {
123-
yylval.str = scanstr(yytext);
106+
yylval.str = pstrdup(yytext);
124107
return ID;
125108
}
126109
{sid} {
127-
/* leading and trailing quotes are not passed to scanstr */
128-
yytext[strlen(yytext) - 1] = '\0';
129-
yylval.str = scanstr(yytext+1);
130-
yytext[strlen(yytext)] = '"'; /* restore yytext */
110+
/* strip quotes and escapes */
111+
yylval.str = DeescapeQuotedString(yytext);
131112
return ID;
132113
}
133114

src/backend/catalog/genbki.pl

+2-4
Original file line numberDiff line numberDiff line change
@@ -845,17 +845,15 @@ sub print_bki_insert
845845
# since that represents a NUL char in C code.
846846
$bki_value = '' if $bki_value eq '\0';
847847

848-
# Handle single quotes by doubling them, and double quotes by
849-
# converting them to octal escapes, because that's what the
848+
# Handle single quotes by doubling them, because that's what the
850849
# bootstrap scanner requires. We do not process backslashes
851850
# specially; this allows escape-string-style backslash escapes
852851
# to be used in catalog data.
853852
$bki_value =~ s/'/''/g;
854-
$bki_value =~ s/"/\\042/g;
855853

856854
# Quote value if needed. We need not quote values that satisfy
857855
# the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+".
858-
$bki_value = sprintf(qq'"%s"', $bki_value)
856+
$bki_value = sprintf("'%s'", $bki_value)
859857
if length($bki_value) == 0
860858
or $bki_value =~ /[^-A-Za-z0-9_]/;
861859

src/backend/parser/scansup.c

+1-94
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/*-------------------------------------------------------------------------
22
*
33
* scansup.c
4-
* support routines for the lex/flex scanner, used by both the normal
5-
* backend as well as the bootstrap backend
4+
* scanner support routines used by the core lexer
65
*
76
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
87
* Portions Copyright (c) 1994, Regents of the University of California
@@ -20,98 +19,6 @@
2019
#include "mb/pg_wchar.h"
2120
#include "parser/scansup.h"
2221

23-
/* ----------------
24-
* scanstr
25-
*
26-
* if the string passed in has escaped codes, map the escape codes to actual
27-
* chars
28-
*
29-
* the string returned is palloc'd and should eventually be pfree'd by the
30-
* caller!
31-
* ----------------
32-
*/
33-
34-
char *
35-
scanstr(const char *s)
36-
{
37-
char *newStr;
38-
int len,
39-
i,
40-
j;
41-
42-
if (s == NULL || s[0] == '\0')
43-
return pstrdup("");
44-
45-
len = strlen(s);
46-
47-
newStr = palloc(len + 1); /* string cannot get longer */
48-
49-
for (i = 0, j = 0; i < len; i++)
50-
{
51-
if (s[i] == '\'')
52-
{
53-
/*
54-
* Note: if scanner is working right, unescaped quotes can only
55-
* appear in pairs, so there should be another character.
56-
*/
57-
i++;
58-
/* The bootstrap parser is not as smart, so check here. */
59-
Assert(s[i] == '\'');
60-
newStr[j] = s[i];
61-
}
62-
else if (s[i] == '\\')
63-
{
64-
i++;
65-
switch (s[i])
66-
{
67-
case 'b':
68-
newStr[j] = '\b';
69-
break;
70-
case 'f':
71-
newStr[j] = '\f';
72-
break;
73-
case 'n':
74-
newStr[j] = '\n';
75-
break;
76-
case 'r':
77-
newStr[j] = '\r';
78-
break;
79-
case 't':
80-
newStr[j] = '\t';
81-
break;
82-
case '0':
83-
case '1':
84-
case '2':
85-
case '3':
86-
case '4':
87-
case '5':
88-
case '6':
89-
case '7':
90-
{
91-
int k;
92-
long octVal = 0;
93-
94-
for (k = 0;
95-
s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
96-
k++)
97-
octVal = (octVal << 3) + (s[i + k] - '0');
98-
i += k - 1;
99-
newStr[j] = ((char) octVal);
100-
}
101-
break;
102-
default:
103-
newStr[j] = s[i];
104-
break;
105-
} /* switch */
106-
} /* s[i] == '\\' */
107-
else
108-
newStr[j] = s[i];
109-
j++;
110-
}
111-
newStr[j] = '\0';
112-
return newStr;
113-
}
114-
11522

11623
/*
11724
* downcase_truncate_identifier() --- do appropriate downcasing and

src/backend/utils/misc/guc-file.l

+8-6
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ static void record_config_file_error(const char *errmsg,
5555
ConfigVariable **tail_p);
5656

5757
static int GUC_flex_fatal(const char *msg);
58-
static char *GUC_scanstr(const char *s);
5958

6059
/* LCOV_EXCL_START */
6160

@@ -797,7 +796,7 @@ ParseConfigFp(FILE *fp, const char *config_file, int depth, int elevel,
797796
token != GUC_UNQUOTED_STRING)
798797
goto parse_error;
799798
if (token == GUC_STRING) /* strip quotes and escapes */
800-
opt_value = GUC_scanstr(yytext);
799+
opt_value = DeescapeQuotedString(yytext);
801800
else
802801
opt_value = pstrdup(yytext);
803802

@@ -1132,22 +1131,25 @@ FreeConfigVariable(ConfigVariable *item)
11321131

11331132

11341133
/*
1135-
* scanstr
1134+
* DeescapeQuotedString
11361135
*
11371136
* Strip the quotes surrounding the given string, and collapse any embedded
11381137
* '' sequences and backslash escapes.
11391138
*
1140-
* the string returned is palloc'd and should eventually be pfree'd by the
1139+
* The string returned is palloc'd and should eventually be pfree'd by the
11411140
* caller.
1141+
*
1142+
* This is exported because it is also used by the bootstrap scanner.
11421143
*/
1143-
static char *
1144-
GUC_scanstr(const char *s)
1144+
char *
1145+
DeescapeQuotedString(const char *s)
11451146
{
11461147
char *newStr;
11471148
int len,
11481149
i,
11491150
j;
11501151

1152+
/* We just Assert that there are leading and trailing quotes */
11511153
Assert(s != NULL && s[0] == '\'');
11521154
len = strlen(s);
11531155
Assert(len >= 2);

src/bin/initdb/initdb.c

+7-27
Original file line numberDiff line numberDiff line change
@@ -331,12 +331,9 @@ escape_quotes(const char *src)
331331

332332
/*
333333
* Escape a field value to be inserted into the BKI data.
334-
* Here, we first run the value through escape_quotes (which
335-
* will be inverted by the backend's scanstr() function) and
336-
* then overlay special processing of double quotes, which
337-
* bootscanner.l will only accept as data if converted to octal
338-
* representation ("\042"). We always wrap the value in double
339-
* quotes, even if that isn't strictly necessary.
334+
* Run the value through escape_quotes (which will be inverted
335+
* by the backend's DeescapeQuotedString() function), then wrap
336+
* the value in single quotes, even if that isn't strictly necessary.
340337
*/
341338
static char *
342339
escape_quotes_bki(const char *src)
@@ -345,30 +342,13 @@ escape_quotes_bki(const char *src)
345342
char *data = escape_quotes(src);
346343
char *resultp;
347344
char *datap;
348-
int nquotes = 0;
349345

350-
/* count double quotes in data */
351-
datap = data;
352-
while ((datap = strchr(datap, '"')) != NULL)
353-
{
354-
nquotes++;
355-
datap++;
356-
}
357-
358-
result = (char *) pg_malloc(strlen(data) + 3 + nquotes * 3);
346+
result = (char *) pg_malloc(strlen(data) + 3);
359347
resultp = result;
360-
*resultp++ = '"';
348+
*resultp++ = '\'';
361349
for (datap = data; *datap; datap++)
362-
{
363-
if (*datap == '"')
364-
{
365-
strcpy(resultp, "\\042");
366-
resultp += 4;
367-
}
368-
else
369-
*resultp++ = *datap;
370-
}
371-
*resultp++ = '"';
350+
*resultp++ = *datap;
351+
*resultp++ = '\'';
372352
*resultp = '\0';
373353

374354
free(data);

src/include/parser/scansup.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/*-------------------------------------------------------------------------
22
*
33
* scansup.h
4-
* scanner support routines. used by both the bootstrap lexer
5-
* as well as the normal lexer
4+
* scanner support routines used by the core lexer
65
*
76
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
87
* Portions Copyright (c) 1994, Regents of the University of California
@@ -15,8 +14,6 @@
1514
#ifndef SCANSUP_H
1615
#define SCANSUP_H
1716

18-
extern char *scanstr(const char *s);
19-
2017
extern char *downcase_truncate_identifier(const char *ident, int len,
2118
bool warn);
2219

src/include/utils/guc.h

+1
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ extern bool ParseConfigDirectory(const char *includedir,
155155
ConfigVariable **head_p,
156156
ConfigVariable **tail_p);
157157
extern void FreeConfigVariables(ConfigVariable *list);
158+
extern char *DeescapeQuotedString(const char *s);
158159

159160
/*
160161
* The possible values of an enum variable are specified by an array of

0 commit comments

Comments
 (0)