Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 87289ff

Browse files
committed
Add regression tests for CSV and \., and add automatic quoting of a
single column dump that has a \. value, so the load works properly. I also added documentation describing this issue.
1 parent 1b184c9 commit 87289ff

File tree

4 files changed

+61
-25
lines changed

4 files changed

+61
-25
lines changed

doc/src/sgml/ref/copy.sgml

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.70 2005/10/15 20:12:33 neilc Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.71 2005/12/28 03:25:32 momjian Exp $
33
PostgreSQL documentation
44
-->
55

@@ -511,17 +511,28 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
511511
comparisons for specific columns.
512512
</para>
513513

514+
<para>
515+
Because backslash is not a special character in the <literal>CSV</>
516+
format, <literal>\.</>, the end-of-data marker, could also appear
517+
as a data value. To avoid any misinterpretation, a <literal>\.</>
518+
data value appearing as a lone entry on a line is automatically
519+
quoted on output, and on input, if quoted, is not interpreted as the
520+
end-of-data marker. If you are loading a single-column table that
521+
might have a column value of <literal>\.</>, you might need to quote
522+
that value in the input file.
523+
</para>
524+
514525
<note>
515-
<para>
516-
In <literal>CSV</> mode, all characters are significant. A quoted value
517-
surrounded by white space, or any characters other than
518-
<literal>DELIMITER</>, will include those characters. This can cause
519-
errors if you import data from a system that pads <literal>CSV</>
520-
lines with white space out to some fixed width. If such a situation
521-
arises you might need to preprocess the <literal>CSV</> file to remove
522-
the trailing white space, before importing the data into
523-
<productname>PostgreSQL</>.
524-
</para>
526+
<para>
527+
In <literal>CSV</> mode, all characters are significant. A quoted value
528+
surrounded by white space, or any characters other than
529+
<literal>DELIMITER</>, will include those characters. This can cause
530+
errors if you import data from a system that pads <literal>CSV</>
531+
lines with white space out to some fixed width. If such a situation
532+
arises you might need to preprocess the <literal>CSV</> file to remove
533+
the trailing white space, before importing the data into
534+
<productname>PostgreSQL</>.
535+
</para>
525536
</note>
526537

527538
<note>

src/backend/commands/copy.c

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.256 2005/12/27 18:10:48 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.257 2005/12/28 03:25:32 momjian Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -244,7 +244,7 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
244244
bool *isnull);
245245
static void CopyAttributeOutText(CopyState cstate, char *server_string);
246246
static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
247-
bool use_quote);
247+
bool use_quote, bool single_attr);
248248
static List *CopyGetAttnums(Relation rel, List *attnamelist);
249249
static char *limit_printout_length(const char *str);
250250

@@ -1284,7 +1284,8 @@ CopyTo(CopyState cstate)
12841284

12851285
colname = NameStr(attr[attnum - 1]->attname);
12861286

1287-
CopyAttributeOutCSV(cstate, colname, false);
1287+
CopyAttributeOutCSV(cstate, colname, false,
1288+
list_length(cstate->attnumlist) == 1);
12881289
}
12891290

12901291
CopySendEndOfRow(cstate);
@@ -1359,7 +1360,8 @@ CopyTo(CopyState cstate)
13591360
value));
13601361
if (cstate->csv_mode)
13611362
CopyAttributeOutCSV(cstate, string,
1362-
force_quote[attnum - 1]);
1363+
force_quote[attnum - 1],
1364+
list_length(cstate->attnumlist) == 1);
13631365
else
13641366
CopyAttributeOutText(cstate, string);
13651367
}
@@ -2968,7 +2970,7 @@ CopyAttributeOutText(CopyState cstate, char *server_string)
29682970
*/
29692971
static void
29702972
CopyAttributeOutCSV(CopyState cstate, char *server_string,
2971-
bool use_quote)
2973+
bool use_quote, bool single_attr)
29722974
{
29732975
char *string;
29742976
char c;
@@ -2993,17 +2995,27 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
29932995
*/
29942996
if (!use_quote)
29952997
{
2996-
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
2997-
{
2998-
if (c == delimc || c == quotec || c == '\n' || c == '\r')
2998+
/*
2999+
* Because '\.' can be a data value, quote it if it appears
3000+
* alone on a line so it is not interpreted as the end-of-data
3001+
* marker.
3002+
*/
3003+
if (single_attr && strcmp(string, "\\.") == 0)
3004+
use_quote = true;
3005+
else
3006+
{
3007+
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
29993008
{
3000-
use_quote = true;
3001-
break;
3009+
if (c == delimc || c == quotec || c == '\n' || c == '\r')
3010+
{
3011+
use_quote = true;
3012+
break;
3013+
}
3014+
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3015+
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
3016+
else
3017+
mblen = 1;
30023018
}
3003-
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3004-
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
3005-
else
3006-
mblen = 1;
30073019
}
30083020
}
30093021

src/test/regress/expected/copy2.out

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,9 @@ COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
194194
--test that we read consecutive LFs properly
195195
CREATE TEMP TABLE testnl (a int, b text, c int);
196196
COPY testnl FROM stdin CSV;
197+
-- test end of copy marker
198+
CREATE TEMP TABLE testeoc (a text);
199+
COPY testeoc FROM stdin CSV;
197200
DROP TABLE x, y;
198201
DROP FUNCTION fn_x_before();
199202
DROP FUNCTION fn_x_after();

src/test/regress/sql/copy2.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,16 @@ COPY testnl FROM stdin CSV;
139139
inside",2
140140
\.
141141

142+
-- test end of copy marker
143+
CREATE TEMP TABLE testeoc (a text);
144+
145+
COPY testeoc FROM stdin CSV;
146+
a\.
147+
\.b
148+
c\.d
149+
"\."
150+
\.
151+
142152

143153
DROP TABLE x, y;
144154
DROP FUNCTION fn_x_before();

0 commit comments

Comments
 (0)