Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7717f63

Browse files
Refactor COPY FROM to use format callback functions.
This commit introduces a new CopyFromRoutine struct, which is a set of callback routines to read tuples in a specific format. It also makes COPY FROM with the existing formats (text, CSV, and binary) utilize these format callbacks. This change is a preliminary step towards making the COPY FROM command extensible in terms of input formats. Similar to 2e4127b, this refactoring contributes to a performance improvement by reducing the number of "if" branches that need to be checked on a per-row basis when sending field representations in text or CSV mode. The performance benchmark results showed ~5% performance gain in text or CSV mode. Author: Sutou Kouhei <kou@clear-code.com> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Reviewed-by: Junwang Zhao <zhjwpku@gmail.com> Discussion: https://postgr.es/m/20231204.153548.2126325458835528809.kou@clear-code.com
1 parent 77cb08b commit 7717f63

File tree

6 files changed

+459
-243
lines changed

6 files changed

+459
-243
lines changed

src/backend/commands/copyfrom.c

+150-42
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
#include "access/tableam.h"
2929
#include "access/xact.h"
3030
#include "catalog/namespace.h"
31-
#include "commands/copy.h"
31+
#include "commands/copyapi.h"
3232
#include "commands/copyfrom_internal.h"
3333
#include "commands/progress.h"
3434
#include "commands/trigger.h"
@@ -106,6 +106,145 @@ typedef struct CopyMultiInsertInfo
106106
/* non-export function prototypes */
107107
static void ClosePipeFromProgram(CopyFromState cstate);
108108

109+
/*
110+
* Built-in format-specific routines. One-row callbacks are defined in
111+
* copyfromparse.c.
112+
*/
113+
static void CopyFromTextLikeInFunc(CopyFromState cstate, Oid atttypid, FmgrInfo *finfo,
114+
Oid *typioparam);
115+
static void CopyFromTextLikeStart(CopyFromState cstate, TupleDesc tupDesc);
116+
static void CopyFromTextLikeEnd(CopyFromState cstate);
117+
static void CopyFromBinaryInFunc(CopyFromState cstate, Oid atttypid,
118+
FmgrInfo *finfo, Oid *typioparam);
119+
static void CopyFromBinaryStart(CopyFromState cstate, TupleDesc tupDesc);
120+
static void CopyFromBinaryEnd(CopyFromState cstate);
121+
122+
123+
/*
124+
* COPY FROM routines for built-in formats.
125+
*
126+
* CSV and text formats share the same TextLike routines except for the
127+
* one-row callback.
128+
*/
129+
130+
/* text format */
131+
static const CopyFromRoutine CopyFromRoutineText = {
132+
.CopyFromInFunc = CopyFromTextLikeInFunc,
133+
.CopyFromStart = CopyFromTextLikeStart,
134+
.CopyFromOneRow = CopyFromTextOneRow,
135+
.CopyFromEnd = CopyFromTextLikeEnd,
136+
};
137+
138+
/* CSV format */
139+
static const CopyFromRoutine CopyFromRoutineCSV = {
140+
.CopyFromInFunc = CopyFromTextLikeInFunc,
141+
.CopyFromStart = CopyFromTextLikeStart,
142+
.CopyFromOneRow = CopyFromCSVOneRow,
143+
.CopyFromEnd = CopyFromTextLikeEnd,
144+
};
145+
146+
/* binary format */
147+
static const CopyFromRoutine CopyFromRoutineBinary = {
148+
.CopyFromInFunc = CopyFromBinaryInFunc,
149+
.CopyFromStart = CopyFromBinaryStart,
150+
.CopyFromOneRow = CopyFromBinaryOneRow,
151+
.CopyFromEnd = CopyFromBinaryEnd,
152+
};
153+
154+
/* Return a COPY FROM routine for the given options */
155+
static const CopyFromRoutine *
156+
CopyFromGetRoutine(CopyFormatOptions opts)
157+
{
158+
if (opts.csv_mode)
159+
return &CopyFromRoutineCSV;
160+
else if (opts.binary)
161+
return &CopyFromRoutineBinary;
162+
163+
/* default is text */
164+
return &CopyFromRoutineText;
165+
}
166+
167+
/* Implementation of the start callback for text and CSV formats */
168+
static void
169+
CopyFromTextLikeStart(CopyFromState cstate, TupleDesc tupDesc)
170+
{
171+
AttrNumber attr_count;
172+
173+
/*
174+
* If encoding conversion is needed, we need another buffer to hold the
175+
* converted input data. Otherwise, we can just point input_buf to the
176+
* same buffer as raw_buf.
177+
*/
178+
if (cstate->need_transcoding)
179+
{
180+
cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
181+
cstate->input_buf_index = cstate->input_buf_len = 0;
182+
}
183+
else
184+
cstate->input_buf = cstate->raw_buf;
185+
cstate->input_reached_eof = false;
186+
187+
initStringInfo(&cstate->line_buf);
188+
189+
/*
190+
* Create workspace for CopyReadAttributes results; used by CSV and text
191+
* format.
192+
*/
193+
attr_count = list_length(cstate->attnumlist);
194+
cstate->max_fields = attr_count;
195+
cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *));
196+
}
197+
198+
/*
199+
* Implementation of the infunc callback for text and CSV formats. Assign
200+
* the input function data to the given *finfo.
201+
*/
202+
static void
203+
CopyFromTextLikeInFunc(CopyFromState cstate, Oid atttypid, FmgrInfo *finfo,
204+
Oid *typioparam)
205+
{
206+
Oid func_oid;
207+
208+
getTypeInputInfo(atttypid, &func_oid, typioparam);
209+
fmgr_info(func_oid, finfo);
210+
}
211+
212+
/* Implementation of the end callback for text and CSV formats */
213+
static void
214+
CopyFromTextLikeEnd(CopyFromState cstate)
215+
{
216+
/* nothing to do */
217+
}
218+
219+
/* Implementation of the start callback for binary format */
220+
static void
221+
CopyFromBinaryStart(CopyFromState cstate, TupleDesc tupDesc)
222+
{
223+
/* Read and verify binary header */
224+
ReceiveCopyBinaryHeader(cstate);
225+
}
226+
227+
/*
228+
* Implementation of the infunc callback for binary format. Assign
229+
* the binary input function to the given *finfo.
230+
*/
231+
static void
232+
CopyFromBinaryInFunc(CopyFromState cstate, Oid atttypid,
233+
FmgrInfo *finfo, Oid *typioparam)
234+
{
235+
Oid func_oid;
236+
237+
getTypeBinaryInputInfo(atttypid, &func_oid, typioparam);
238+
fmgr_info(func_oid, finfo);
239+
}
240+
241+
/* Implementation of the end callback for binary format */
242+
static void
243+
CopyFromBinaryEnd(CopyFromState cstate)
244+
{
245+
/* nothing to do */
246+
}
247+
109248
/*
110249
* error context callback for COPY FROM
111250
*
@@ -1403,7 +1542,6 @@ BeginCopyFrom(ParseState *pstate,
14031542
num_defaults;
14041543
FmgrInfo *in_functions;
14051544
Oid *typioparams;
1406-
Oid in_func_oid;
14071545
int *defmap;
14081546
ExprState **defexprs;
14091547
MemoryContext oldcontext;
@@ -1435,6 +1573,9 @@ BeginCopyFrom(ParseState *pstate,
14351573
/* Extract options from the statement node tree */
14361574
ProcessCopyOptions(pstate, &cstate->opts, true /* is_from */ , options);
14371575

1576+
/* Set the format routine */
1577+
cstate->routine = CopyFromGetRoutine(cstate->opts);
1578+
14381579
/* Process the target relation */
14391580
cstate->rel = rel;
14401581

@@ -1590,25 +1731,6 @@ BeginCopyFrom(ParseState *pstate,
15901731
cstate->raw_buf_index = cstate->raw_buf_len = 0;
15911732
cstate->raw_reached_eof = false;
15921733

1593-
if (!cstate->opts.binary)
1594-
{
1595-
/*
1596-
* If encoding conversion is needed, we need another buffer to hold
1597-
* the converted input data. Otherwise, we can just point input_buf
1598-
* to the same buffer as raw_buf.
1599-
*/
1600-
if (cstate->need_transcoding)
1601-
{
1602-
cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
1603-
cstate->input_buf_index = cstate->input_buf_len = 0;
1604-
}
1605-
else
1606-
cstate->input_buf = cstate->raw_buf;
1607-
cstate->input_reached_eof = false;
1608-
1609-
initStringInfo(&cstate->line_buf);
1610-
}
1611-
16121734
initStringInfo(&cstate->attribute_buf);
16131735

16141736
/* Assign range table and rteperminfos, we'll need them in CopyFrom. */
@@ -1641,13 +1763,9 @@ BeginCopyFrom(ParseState *pstate,
16411763
continue;
16421764

16431765
/* Fetch the input function and typioparam info */
1644-
if (cstate->opts.binary)
1645-
getTypeBinaryInputInfo(att->atttypid,
1646-
&in_func_oid, &typioparams[attnum - 1]);
1647-
else
1648-
getTypeInputInfo(att->atttypid,
1649-
&in_func_oid, &typioparams[attnum - 1]);
1650-
fmgr_info(in_func_oid, &in_functions[attnum - 1]);
1766+
cstate->routine->CopyFromInFunc(cstate, att->atttypid,
1767+
&in_functions[attnum - 1],
1768+
&typioparams[attnum - 1]);
16511769

16521770
/* Get default info if available */
16531771
defexprs[attnum - 1] = NULL;
@@ -1782,20 +1900,7 @@ BeginCopyFrom(ParseState *pstate,
17821900

17831901
pgstat_progress_update_multi_param(3, progress_cols, progress_vals);
17841902

1785-
if (cstate->opts.binary)
1786-
{
1787-
/* Read and verify binary header */
1788-
ReceiveCopyBinaryHeader(cstate);
1789-
}
1790-
1791-
/* create workspace for CopyReadAttributes results */
1792-
if (!cstate->opts.binary)
1793-
{
1794-
AttrNumber attr_count = list_length(cstate->attnumlist);
1795-
1796-
cstate->max_fields = attr_count;
1797-
cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *));
1798-
}
1903+
cstate->routine->CopyFromStart(cstate, tupDesc);
17991904

18001905
MemoryContextSwitchTo(oldcontext);
18011906

@@ -1808,6 +1913,9 @@ BeginCopyFrom(ParseState *pstate,
18081913
void
18091914
EndCopyFrom(CopyFromState cstate)
18101915
{
1916+
/* Invoke the end callback */
1917+
cstate->routine->CopyFromEnd(cstate);
1918+
18111919
/* No COPY FROM related resources except memory. */
18121920
if (cstate->is_program)
18131921
{

0 commit comments

Comments
 (0)