Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 95c238d

Browse files
committed
Improve efficiency of attribute scanning in CopyReadAttributesCSV.
The loop is split into two parts, inside quotes, and outside quotes, saving some instructions in both parts. Heikki Linnakangas
1 parent 9c767ad commit 95c238d

File tree

1 file changed

+64
-50
lines changed

1 file changed

+64
-50
lines changed

src/backend/commands/copy.c

Lines changed: 64 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.295 2008/01/01 19:45:48 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.296 2008/03/08 01:16:26 adunstan Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -2913,7 +2913,6 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
29132913
for (;;)
29142914
{
29152915
bool found_delim = false;
2916-
bool in_quote = false;
29172916
bool saw_quote = false;
29182917
char *start_ptr;
29192918
char *end_ptr;
@@ -2929,72 +2928,87 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
29292928
start_ptr = cur_ptr;
29302929
fieldvals[fieldno] = output_ptr;
29312930

2932-
/* Scan data for field */
2931+
/* Scan data for field,
2932+
*
2933+
* The loop starts in "not quote" mode and then toggles between
2934+
* that and "in quote" mode.
2935+
* The loop exits normally if it is in "not quote" mode and a
2936+
* delimiter or line end is seen.
2937+
*/
29332938
for (;;)
29342939
{
29352940
char c;
29362941

2937-
end_ptr = cur_ptr;
2938-
if (cur_ptr >= line_end_ptr)
2939-
break;
2940-
c = *cur_ptr++;
2941-
/* unquoted field delimiter */
2942-
if (c == delimc && !in_quote)
2943-
{
2944-
found_delim = true;
2945-
break;
2946-
}
2947-
/* start of quoted field (or part of field) */
2948-
if (c == quotec && !in_quote)
2942+
/* Not in quote */
2943+
for (;;)
29492944
{
2950-
saw_quote = true;
2951-
in_quote = true;
2952-
continue;
2945+
end_ptr = cur_ptr;
2946+
if (cur_ptr >= line_end_ptr)
2947+
goto endfield;
2948+
c = *cur_ptr++;
2949+
/* unquoted field delimiter */
2950+
if (c == delimc)
2951+
{
2952+
found_delim = true;
2953+
goto endfield;
2954+
}
2955+
/* start of quoted field (or part of field) */
2956+
if (c == quotec)
2957+
{
2958+
saw_quote = true;
2959+
break;
2960+
}
2961+
/* Add c to output string */
2962+
*output_ptr++ = c;
29532963
}
2954-
/* escape within a quoted field */
2955-
if (c == escapec && in_quote)
2964+
2965+
/* In quote */
2966+
for (;;)
29562967
{
2957-
/*
2958-
* peek at the next char if available, and escape it if it is
2959-
* an escape char or a quote char
2960-
*/
2961-
if (cur_ptr < line_end_ptr)
2962-
{
2963-
char nextc = *cur_ptr;
2968+
end_ptr = cur_ptr;
2969+
if (cur_ptr >= line_end_ptr)
2970+
ereport(ERROR,
2971+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2972+
errmsg("unterminated CSV quoted field")));
2973+
2974+
c = *cur_ptr++;
29642975

2965-
if (nextc == escapec || nextc == quotec)
2976+
/* escape within a quoted field */
2977+
if (c == escapec)
2978+
{
2979+
/*
2980+
* peek at the next char if available, and escape it if it is
2981+
* an escape char or a quote char
2982+
*/
2983+
if (cur_ptr < line_end_ptr)
29662984
{
2967-
*output_ptr++ = nextc;
2968-
cur_ptr++;
2969-
continue;
2985+
char nextc = *cur_ptr;
2986+
2987+
if (nextc == escapec || nextc == quotec)
2988+
{
2989+
*output_ptr++ = nextc;
2990+
cur_ptr++;
2991+
continue;
2992+
}
29702993
}
29712994
}
2972-
}
2995+
/*
2996+
* end of quoted field. Must do this test after testing for escape
2997+
* in case quote char and escape char are the same (which is the
2998+
* common case).
2999+
*/
3000+
if (c == quotec)
3001+
break;
29733002

2974-
/*
2975-
* end of quoted field. Must do this test after testing for escape
2976-
* in case quote char and escape char are the same (which is the
2977-
* common case).
2978-
*/
2979-
if (c == quotec && in_quote)
2980-
{
2981-
in_quote = false;
2982-
continue;
3003+
/* Add c to output string */
3004+
*output_ptr++ = c;
29833005
}
2984-
2985-
/* Add c to output string */
2986-
*output_ptr++ = c;
29873006
}
3007+
endfield:
29883008

29893009
/* Terminate attribute value in output area */
29903010
*output_ptr++ = '\0';
29913011

2992-
/* Shouldn't still be in quote mode */
2993-
if (in_quote)
2994-
ereport(ERROR,
2995-
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2996-
errmsg("unterminated CSV quoted field")));
2997-
29983012
/* Check whether raw input matched null marker */
29993013
input_len = end_ptr - start_ptr;
30003014
if (!saw_quote && input_len == cstate->null_print_len &&

0 commit comments

Comments
 (0)