Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0a5fdb0

Browse files
committed
Reduce per-character overhead in COPY OUT by combining calls to
CopySendData.
1 parent c76cb77 commit 0a5fdb0

File tree

1 file changed

+90
-42
lines changed

1 file changed

+90
-42
lines changed

src/backend/commands/copy.c

Lines changed: 90 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.264 2006/05/21 20:05:19 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.265 2006/05/25 18:42:17 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -243,8 +243,8 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
243243
int column_no, FmgrInfo *flinfo,
244244
Oid typioparam, int32 typmod,
245245
bool *isnull);
246-
static void CopyAttributeOutText(CopyState cstate, char *server_string);
247-
static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
246+
static void CopyAttributeOutText(CopyState cstate, char *string);
247+
static void CopyAttributeOutCSV(CopyState cstate, char *string,
248248
bool use_quote, bool single_attr);
249249
static List *CopyGetAttnums(Relation rel, List *attnamelist);
250250
static char *limit_printout_length(const char *str);
@@ -2884,91 +2884,123 @@ CopyReadBinaryAttribute(CopyState cstate,
28842884
/*
28852885
* Send text representation of one attribute, with conversion and escaping
28862886
*/
2887+
#define DUMPSOFAR() \
2888+
do { \
2889+
if (ptr > start) \
2890+
CopySendData(cstate, start, ptr - start); \
2891+
} while (0)
2892+
28872893
static void
2888-
CopyAttributeOutText(CopyState cstate, char *server_string)
2894+
CopyAttributeOutText(CopyState cstate, char *string)
28892895
{
2890-
char *string;
2896+
char *ptr;
2897+
char *start;
28912898
char c;
28922899
char delimc = cstate->delim[0];
2893-
int mblen;
28942900

28952901
if (cstate->need_transcoding)
2896-
string = pg_server_to_client(server_string, strlen(server_string));
2902+
ptr = pg_server_to_client(string, strlen(string));
28972903
else
2898-
string = server_string;
2904+
ptr = string;
28992905

2900-
for (; (c = *string) != '\0'; string += mblen)
2906+
/*
2907+
* We have to grovel through the string searching for control characters
2908+
* and instances of the delimiter character. In most cases, though, these
2909+
* are infrequent. To avoid overhead from calling CopySendData once per
2910+
* character, we dump out all characters between replaceable characters
2911+
* in a single call. The loop invariant is that the data from "start"
2912+
* to "ptr" can be sent literally, but hasn't yet been.
2913+
*/
2914+
start = ptr;
2915+
while ((c = *ptr) != '\0')
29012916
{
2902-
mblen = 1;
2903-
29042917
switch (c)
29052918
{
29062919
case '\b':
2920+
DUMPSOFAR();
29072921
CopySendString(cstate, "\\b");
2922+
start = ++ptr;
29082923
break;
29092924
case '\f':
2925+
DUMPSOFAR();
29102926
CopySendString(cstate, "\\f");
2927+
start = ++ptr;
29112928
break;
29122929
case '\n':
2930+
DUMPSOFAR();
29132931
CopySendString(cstate, "\\n");
2932+
start = ++ptr;
29142933
break;
29152934
case '\r':
2935+
DUMPSOFAR();
29162936
CopySendString(cstate, "\\r");
2937+
start = ++ptr;
29172938
break;
29182939
case '\t':
2940+
DUMPSOFAR();
29192941
CopySendString(cstate, "\\t");
2942+
start = ++ptr;
29202943
break;
29212944
case '\v':
2945+
DUMPSOFAR();
29222946
CopySendString(cstate, "\\v");
2947+
start = ++ptr;
29232948
break;
29242949
case '\\':
2950+
DUMPSOFAR();
29252951
CopySendString(cstate, "\\\\");
2952+
start = ++ptr;
29262953
break;
29272954
default:
29282955
if (c == delimc)
2956+
{
2957+
DUMPSOFAR();
29292958
CopySendChar(cstate, '\\');
2959+
start = ptr; /* we include char in next run */
2960+
}
29302961

29312962
/*
29322963
* We can skip pg_encoding_mblen() overhead when encoding is
29332964
* safe, because in valid backend encodings, extra bytes of a
29342965
* multibyte character never look like ASCII.
29352966
*/
2936-
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
2937-
mblen = pg_encoding_mblen(cstate->client_encoding, string);
2938-
CopySendData(cstate, string, mblen);
2967+
if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
2968+
ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
2969+
else
2970+
ptr++;
29392971
break;
29402972
}
29412973
}
2974+
2975+
DUMPSOFAR();
29422976
}
29432977

29442978
/*
2945-
* Send CSV representation of one attribute, with conversion and
2946-
* CSV type escaping
2979+
* Send text representation of one attribute, with conversion and
2980+
* CSV-style escaping
29472981
*/
29482982
static void
2949-
CopyAttributeOutCSV(CopyState cstate, char *server_string,
2983+
CopyAttributeOutCSV(CopyState cstate, char *string,
29502984
bool use_quote, bool single_attr)
29512985
{
2952-
char *string;
2986+
char *ptr;
2987+
char *start;
29532988
char c;
29542989
char delimc = cstate->delim[0];
29552990
char quotec = cstate->quote[0];
29562991
char escapec = cstate->escape[0];
2957-
char *tstring;
2958-
int mblen;
29592992

2960-
/* force quoting if it matches null_print */
2961-
if (!use_quote && strcmp(server_string, cstate->null_print) == 0)
2993+
/* force quoting if it matches null_print (before conversion!) */
2994+
if (!use_quote && strcmp(string, cstate->null_print) == 0)
29622995
use_quote = true;
29632996

29642997
if (cstate->need_transcoding)
2965-
string = pg_server_to_client(server_string, strlen(server_string));
2998+
ptr = pg_server_to_client(string, strlen(string));
29662999
else
2967-
string = server_string;
3000+
ptr = string;
29683001

29693002
/*
2970-
* have to run through the string twice, first time to see if it needs
2971-
* quoting, second to actually send it
3003+
* Make a preliminary pass to discover if it needs quoting
29723004
*/
29733005
if (!use_quote)
29743006
{
@@ -2977,41 +3009,57 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
29773009
* alone on a line so it is not interpreted as the end-of-data
29783010
* marker.
29793011
*/
2980-
if (single_attr && strcmp(string, "\\.") == 0)
3012+
if (single_attr && strcmp(ptr, "\\.") == 0)
29813013
use_quote = true;
29823014
else
29833015
{
2984-
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
3016+
char *tptr = ptr;
3017+
3018+
while ((c = *tptr) != '\0')
29853019
{
29863020
if (c == delimc || c == quotec || c == '\n' || c == '\r')
29873021
{
29883022
use_quote = true;
29893023
break;
29903024
}
2991-
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
2992-
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
3025+
if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3026+
tptr += pg_encoding_mblen(cstate->client_encoding, tptr);
29933027
else
2994-
mblen = 1;
3028+
tptr++;
29953029
}
29963030
}
29973031
}
29983032

29993033
if (use_quote)
3034+
{
30003035
CopySendChar(cstate, quotec);
30013036

3002-
for (; (c = *string) != '\0'; string += mblen)
3003-
{
3004-
if (use_quote && (c == quotec || c == escapec))
3005-
CopySendChar(cstate, escapec);
3006-
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3007-
mblen = pg_encoding_mblen(cstate->client_encoding, string);
3008-
else
3009-
mblen = 1;
3010-
CopySendData(cstate, string, mblen);
3011-
}
3037+
/*
3038+
* We adopt the same optimization strategy as in CopyAttributeOutText
3039+
*/
3040+
start = ptr;
3041+
while ((c = *ptr) != '\0')
3042+
{
3043+
if (c == quotec || c == escapec)
3044+
{
3045+
DUMPSOFAR();
3046+
CopySendChar(cstate, escapec);
3047+
start = ptr; /* we include char in next run */
3048+
}
3049+
if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3050+
ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
3051+
else
3052+
ptr++;
3053+
}
3054+
DUMPSOFAR();
30123055

3013-
if (use_quote)
30143056
CopySendChar(cstate, quotec);
3057+
}
3058+
else
3059+
{
3060+
/* If it doesn't need quoting, we can just dump it as-is */
3061+
CopySendString(cstate, ptr);
3062+
}
30153063
}
30163064

30173065
/*

0 commit comments

Comments
 (0)