Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0a0727c

Browse files
committed
Improve performance of binary COPY FROM through better buffering.
At least on Linux and macOS, fread() turns out to have far higher per-call overhead than one could wish. Reading 64KB of data at a time and then parceling it out with our own memcpy logic makes binary COPY from a file significantly faster --- around 30% in simple testing for cases with narrow text columns (on Linux ... even more on macOS). In binary COPY from frontend, there's no per-call fread(), and this patch introduces an extra layer of memcpy'ing, but it still manages to eke out a small win. Apparently, the control-logic overhead in CopyGetData() is enough to be worth avoiding for small fetches. Bharath Rupireddy and Amit Langote, reviewed by Vignesh C, cosmetic tweaks by me Discussion: https://postgr.es/m/CALj2ACU5Bz06HWLwqSzNMN=Gupoj6Rcn_QVC+k070V4em9wu=A@mail.gmail.com
1 parent 8a37951 commit 0a0727c

File tree

1 file changed

+83
-35
lines changed

1 file changed

+83
-35
lines changed

src/backend/commands/copy.c

Lines changed: 83 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -187,15 +187,15 @@ typedef struct CopyStateData
187187
TransitionCaptureState *transition_capture;
188188

189189
/*
190-
* These variables are used to reduce overhead in textual COPY FROM.
190+
* These variables are used to reduce overhead in COPY FROM.
191191
*
192192
* attribute_buf holds the separated, de-escaped text for each field of
193193
* the current line. The CopyReadAttributes functions return arrays of
194194
* pointers into this buffer. We avoid palloc/pfree overhead by re-using
195195
* the buffer on each cycle.
196196
*
197-
* (In binary COPY FROM, attribute_buf holds the binary data for the
198-
* current field, while the other variables are not used.)
197+
* In binary COPY FROM, attribute_buf holds the binary data for the
198+
* current field, but the usage is otherwise similar.
199199
*/
200200
StringInfoData attribute_buf;
201201

@@ -209,23 +209,27 @@ typedef struct CopyStateData
209209
* input cycle is first to read the whole line into line_buf, convert it
210210
* to server encoding there, and then extract the individual attribute
211211
* fields into attribute_buf. line_buf is preserved unmodified so that we
212-
* can display it in error messages if appropriate.
212+
* can display it in error messages if appropriate. (In binary mode,
213+
* line_buf is not used.)
213214
*/
214215
StringInfoData line_buf;
215216
bool line_buf_converted; /* converted to server encoding? */
216217
bool line_buf_valid; /* contains the row being processed? */
217218

218219
/*
219220
* Finally, raw_buf holds raw data read from the data source (file or
220-
* client connection). CopyReadLine parses this data sufficiently to
221-
* locate line boundaries, then transfers the data to line_buf and
222-
* converts it. Note: we guarantee that there is a \0 at
223-
* raw_buf[raw_buf_len].
221+
* client connection). In text mode, CopyReadLine parses this data
222+
* sufficiently to locate line boundaries, then transfers the data to
223+
* line_buf and converts it. In binary mode, CopyReadBinaryData fetches
224+
* appropriate amounts of data from this buffer. In both modes, we
225+
* guarantee that there is a \0 at raw_buf[raw_buf_len].
224226
*/
225227
#define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
226228
char *raw_buf;
227229
int raw_buf_index; /* next byte to process */
228230
int raw_buf_len; /* total # of bytes stored */
231+
/* Shorthand for number of unconsumed bytes available in raw_buf */
232+
#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
229233
} CopyStateData;
230234

231235
/* DestReceiver for COPY (query) TO */
@@ -394,6 +398,8 @@ static void CopySendInt32(CopyState cstate, int32 val);
394398
static bool CopyGetInt32(CopyState cstate, int32 *val);
395399
static void CopySendInt16(CopyState cstate, int16 val);
396400
static bool CopyGetInt16(CopyState cstate, int16 *val);
401+
static bool CopyLoadRawBuf(CopyState cstate);
402+
static int CopyReadBinaryData(CopyState cstate, char *dest, int nbytes);
397403

398404

399405
/*
@@ -723,7 +729,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
723729
/*
724730
* CopySendInt32 sends an int32 in network byte order
725731
*/
726-
static void
732+
static inline void
727733
CopySendInt32(CopyState cstate, int32 val)
728734
{
729735
uint32 buf;
@@ -737,12 +743,12 @@ CopySendInt32(CopyState cstate, int32 val)
737743
*
738744
* Returns true if OK, false if EOF
739745
*/
740-
static bool
746+
static inline bool
741747
CopyGetInt32(CopyState cstate, int32 *val)
742748
{
743749
uint32 buf;
744750

745-
if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
751+
if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
746752
{
747753
*val = 0; /* suppress compiler warning */
748754
return false;
@@ -754,7 +760,7 @@ CopyGetInt32(CopyState cstate, int32 *val)
754760
/*
755761
* CopySendInt16 sends an int16 in network byte order
756762
*/
757-
static void
763+
static inline void
758764
CopySendInt16(CopyState cstate, int16 val)
759765
{
760766
uint16 buf;
@@ -766,12 +772,12 @@ CopySendInt16(CopyState cstate, int16 val)
766772
/*
767773
* CopyGetInt16 reads an int16 that appears in network byte order
768774
*/
769-
static bool
775+
static inline bool
770776
CopyGetInt16(CopyState cstate, int16 *val)
771777
{
772778
uint16 buf;
773779

774-
if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
780+
if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
775781
{
776782
*val = 0; /* suppress compiler warning */
777783
return false;
@@ -786,26 +792,20 @@ CopyGetInt16(CopyState cstate, int16 *val)
786792
*
787793
* Returns true if able to obtain at least one more byte, else false.
788794
*
789-
* If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
790-
* down to the start of the buffer and then we load more data after that.
791-
* This case is used only when a frontend multibyte character crosses a
792-
* bufferload boundary.
795+
* If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
796+
* of the buffer and then we load more data after that. This case occurs only
797+
* when a multibyte character crosses a bufferload boundary.
793798
*/
794799
static bool
795800
CopyLoadRawBuf(CopyState cstate)
796801
{
797-
int nbytes;
802+
int nbytes = RAW_BUF_BYTES(cstate);
798803
int inbytes;
799804

800-
if (cstate->raw_buf_index < cstate->raw_buf_len)
801-
{
802-
/* Copy down the unprocessed data */
803-
nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
805+
/* Copy down the unprocessed data if any. */
806+
if (nbytes > 0)
804807
memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
805808
nbytes);
806-
}
807-
else
808-
nbytes = 0; /* no data need be saved */
809809

810810
inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
811811
1, RAW_BUF_SIZE - nbytes);
@@ -816,6 +816,54 @@ CopyLoadRawBuf(CopyState cstate)
816816
return (inbytes > 0);
817817
}
818818

819+
/*
820+
* CopyReadBinaryData
821+
*
822+
* Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
823+
* and writes them to 'dest'. Returns the number of bytes read (which
824+
* would be less than 'nbytes' only if we reach EOF).
825+
*/
826+
static int
827+
CopyReadBinaryData(CopyState cstate, char *dest, int nbytes)
828+
{
829+
int copied_bytes = 0;
830+
831+
if (RAW_BUF_BYTES(cstate) >= nbytes)
832+
{
833+
/* Enough bytes are present in the buffer. */
834+
memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
835+
cstate->raw_buf_index += nbytes;
836+
copied_bytes = nbytes;
837+
}
838+
else
839+
{
840+
/*
841+
* Not enough bytes in the buffer, so must read from the file. Need
842+
* to loop since 'nbytes' could be larger than the buffer size.
843+
*/
844+
do
845+
{
846+
int copy_bytes;
847+
848+
/* Load more data if buffer is empty. */
849+
if (RAW_BUF_BYTES(cstate) == 0)
850+
{
851+
if (!CopyLoadRawBuf(cstate))
852+
break; /* EOF */
853+
}
854+
855+
/* Transfer some bytes. */
856+
copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
857+
memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
858+
cstate->raw_buf_index += copy_bytes;
859+
dest += copy_bytes;
860+
copied_bytes += copy_bytes;
861+
} while (copied_bytes < nbytes);
862+
}
863+
864+
return copied_bytes;
865+
}
866+
819867

820868
/*
821869
* DoCopy executes the SQL COPY statement
@@ -3366,17 +3414,17 @@ BeginCopyFrom(ParseState *pstate,
33663414
cstate->cur_attval = NULL;
33673415

33683416
/*
3369-
* Set up variables to avoid per-attribute overhead. attribute_buf is
3370-
* used in both text and binary modes, but we use line_buf and raw_buf
3417+
* Set up variables to avoid per-attribute overhead. attribute_buf and
3418+
* raw_buf are used in both text and binary modes, but we use line_buf
33713419
* only in text mode.
33723420
*/
33733421
initStringInfo(&cstate->attribute_buf);
3422+
cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
3423+
cstate->raw_buf_index = cstate->raw_buf_len = 0;
33743424
if (!cstate->binary)
33753425
{
33763426
initStringInfo(&cstate->line_buf);
33773427
cstate->line_buf_converted = false;
3378-
cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
3379-
cstate->raw_buf_index = cstate->raw_buf_len = 0;
33803428
}
33813429

33823430
/* Assign range table, we'll need it in CopyFrom. */
@@ -3527,7 +3575,7 @@ BeginCopyFrom(ParseState *pstate,
35273575
int32 tmp;
35283576

35293577
/* Signature */
3530-
if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
3578+
if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
35313579
memcmp(readSig, BinarySignature, 11) != 0)
35323580
ereport(ERROR,
35333581
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
@@ -3555,7 +3603,7 @@ BeginCopyFrom(ParseState *pstate,
35553603
/* Skip extension header, if present */
35563604
while (tmp-- > 0)
35573605
{
3558-
if (CopyGetData(cstate, readSig, 1, 1) != 1)
3606+
if (CopyReadBinaryData(cstate, readSig, 1) != 1)
35593607
ereport(ERROR,
35603608
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
35613609
errmsg("invalid COPY file header (wrong length)")));
@@ -3771,7 +3819,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
37713819
char dummy;
37723820

37733821
if (cstate->copy_dest != COPY_OLD_FE &&
3774-
CopyGetData(cstate, &dummy, 1, 1) > 0)
3822+
CopyReadBinaryData(cstate, &dummy, 1) > 0)
37753823
ereport(ERROR,
37763824
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
37773825
errmsg("received copy data after EOF marker")));
@@ -4744,8 +4792,8 @@ CopyReadBinaryAttribute(CopyState cstate, FmgrInfo *flinfo,
47444792
resetStringInfo(&cstate->attribute_buf);
47454793

47464794
enlargeStringInfo(&cstate->attribute_buf, fld_size);
4747-
if (CopyGetData(cstate, cstate->attribute_buf.data,
4748-
fld_size, fld_size) != fld_size)
4795+
if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
4796+
fld_size) != fld_size)
47494797
ereport(ERROR,
47504798
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
47514799
errmsg("unexpected EOF in COPY data")));

0 commit comments

Comments
 (0)