Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4ac2a9b

Browse files
committed
Add REJECT_LIMIT option to the COPY command.
Previously, when ON_ERROR was set to 'ignore', the COPY command would skip all rows with data type conversion errors, with no way to limit the number of skipped rows before failing. This commit introduces the REJECT_LIMIT option, allowing users to specify the maximum number of erroneous rows that can be skipped. If more rows encounter data type conversion errors than allowed by REJECT_LIMIT, the COPY command will fail with an error, even when ON_ERROR = 'ignore'. Author: Atsushi Torikoshi Reviewed-by: Junwang Zhao, Kirill Reshke, jian he, Fujii Masao Discussion: https://postgr.es/m/63f99327aa6b404cc951217fa3e61fe4@oss.nttdata.com
1 parent d759c1a commit 4ac2a9b

File tree

6 files changed

+91
-0
lines changed

6 files changed

+91
-0
lines changed

doc/src/sgml/ref/copy.sgml

+19
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
4444
FORCE_NOT_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
4545
FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
4646
ON_ERROR <replaceable class="parameter">error_action</replaceable>
47+
REJECT_LIMIT <replaceable class="parameter">maxerror</replaceable>
4748
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
4849
LOG_VERBOSITY <replaceable class="parameter">verbosity</replaceable>
4950
</synopsis>
@@ -413,6 +414,24 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
413414
</listitem>
414415
</varlistentry>
415416

417+
<varlistentry>
418+
<term><literal>REJECT_LIMIT</literal></term>
419+
<listitem>
420+
<para>
421+
Specifies the maximum number of errors tolerated while converting a
422+
column's input value to its data type, when <literal>ON_ERROR</literal> is
423+
set to <literal>ignore</literal>.
424+
If the input causes more errors than the specified value, the <command>COPY</command>
425+
command fails, even with <literal>ON_ERROR</literal> set to <literal>ignore</literal>.
426+
This clause must be used with <literal>ON_ERROR</literal>=<literal>ignore</literal>
427+
and <replaceable class="parameter">maxerror</replaceable> must be positive <type>bigint</type>.
428+
If not specified, <literal>ON_ERROR</literal>=<literal>ignore</literal>
429+
allows an unlimited number of errors, meaning <command>COPY</command> will
430+
skip all erroneous data.
431+
</para>
432+
</listitem>
433+
</varlistentry>
434+
416435
<varlistentry>
417436
<term><literal>ENCODING</literal></term>
418437
<listitem>

src/backend/commands/copy.c

+33
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,23 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
418418
return COPY_ON_ERROR_STOP; /* keep compiler quiet */
419419
}
420420

421+
/*
422+
* Extract REJECT_LIMIT value from a DefElem.
423+
*/
424+
static int64
425+
defGetCopyRejectLimitOption(DefElem *def)
426+
{
427+
int64 reject_limit = defGetInt64(def);
428+
429+
if (reject_limit <= 0)
430+
ereport(ERROR,
431+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
432+
errmsg("REJECT_LIMIT (%lld) must be greater than zero",
433+
(long long) reject_limit)));
434+
435+
return reject_limit;
436+
}
437+
421438
/*
422439
* Extract a CopyLogVerbosityChoice value from a DefElem.
423440
*/
@@ -472,6 +489,7 @@ ProcessCopyOptions(ParseState *pstate,
472489
bool header_specified = false;
473490
bool on_error_specified = false;
474491
bool log_verbosity_specified = false;
492+
bool reject_limit_specified = false;
475493
ListCell *option;
476494

477495
/* Support external use for option sanity checking */
@@ -638,6 +656,13 @@ ProcessCopyOptions(ParseState *pstate,
638656
log_verbosity_specified = true;
639657
opts_out->log_verbosity = defGetCopyLogVerbosityChoice(defel, pstate);
640658
}
659+
else if (strcmp(defel->defname, "reject_limit") == 0)
660+
{
661+
if (reject_limit_specified)
662+
errorConflictingDefElem(defel, pstate);
663+
reject_limit_specified = true;
664+
opts_out->reject_limit = defGetCopyRejectLimitOption(defel);
665+
}
641666
else
642667
ereport(ERROR,
643668
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -874,6 +899,14 @@ ProcessCopyOptions(ParseState *pstate,
874899
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
875900
errmsg("NULL specification and DEFAULT specification cannot be the same")));
876901
}
902+
/* Check on_error */
903+
if (opts_out->reject_limit && !opts_out->on_error)
904+
ereport(ERROR,
905+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
906+
/*- translator: first and second %s are the names of COPY option, e.g.
907+
* ON_ERROR, third is the value of the COPY option, e.g. IGNORE */
908+
errmsg("COPY %s requires %s to be set to %s",
909+
"REJECT_LIMIT", "ON_ERROR", "IGNORE")));
877910
}
878911

879912
/*

src/backend/commands/copyfrom.c

+7
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,13 @@ CopyFrom(CopyFromState cstate)
10181018
pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
10191019
cstate->num_errors);
10201020

1021+
if (cstate->opts.reject_limit > 0 && \
1022+
cstate->num_errors > cstate->opts.reject_limit)
1023+
ereport(ERROR,
1024+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1025+
errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
1026+
(long long) cstate->opts.reject_limit)));
1027+
10211028
/* Repeat NextCopyFrom() until no soft error occurs */
10221029
continue;
10231030
}

src/include/commands/copy.h

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ typedef struct CopyFormatOptions
8585
bool convert_selectively; /* do selective binary conversion? */
8686
CopyOnErrorChoice on_error; /* what to do when error happened */
8787
CopyLogVerbosityChoice log_verbosity; /* verbosity of logged messages */
88+
int64 reject_limit; /* maximum tolerable number of errors */
8889
List *convert_select; /* list of column names (can be NIL) */
8990
} CopyFormatOptions;
9091

src/test/regress/expected/copy2.out

+10
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ COPY x to stdout (log_verbosity unsupported);
116116
ERROR: COPY LOG_VERBOSITY "unsupported" not recognized
117117
LINE 1: COPY x to stdout (log_verbosity unsupported);
118118
^
119+
COPY x from stdin with (reject_limit 1);
120+
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
121+
COPY x from stdin with (on_error ignore, reject_limit 0);
122+
ERROR: REJECT_LIMIT (0) must be greater than zero
119123
-- too many columns in column list: should fail
120124
COPY x (a, b, c, d, e, d, c) from stdin;
121125
ERROR: column "d" specified more than once
@@ -791,6 +795,12 @@ CONTEXT: COPY check_ign_err, line 1: "1 {1}"
791795
COPY check_ign_err FROM STDIN WITH (on_error ignore);
792796
ERROR: extra data after last expected column
793797
CONTEXT: COPY check_ign_err, line 1: "1 {1} 3 abc"
798+
-- tests for reject_limit option
799+
COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 3);
800+
ERROR: skipped more than REJECT_LIMIT (3) rows due to data type incompatibility
801+
CONTEXT: COPY check_ign_err, line 5, column n: ""
802+
COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4);
803+
NOTICE: 4 rows were skipped due to data type incompatibility
794804
-- clean up
795805
DROP TABLE forcetest;
796806
DROP TABLE vistest;

src/test/regress/sql/copy2.sql

+21
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ COPY x to stdout (format TEXT, force_null(a));
8282
COPY x to stdin (format CSV, force_null(a));
8383
COPY x to stdin (format BINARY, on_error unsupported);
8484
COPY x to stdout (log_verbosity unsupported);
85+
COPY x from stdin with (reject_limit 1);
86+
COPY x from stdin with (on_error ignore, reject_limit 0);
8587

8688
-- too many columns in column list: should fail
8789
COPY x (a, b, c, d, e, d, c) from stdin;
@@ -561,6 +563,25 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore);
561563
1 {1} 3 abc
562564
\.
563565

566+
-- tests for reject_limit option
567+
COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 3);
568+
6 {6} 6
569+
a {7} 7
570+
8 {8} 8888888888
571+
9 {a, 9} 9
572+
573+
10 {10} 10
574+
\.
575+
576+
COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4);
577+
6 {6} 6
578+
a {7} 7
579+
8 {8} 8888888888
580+
9 {a, 9} 9
581+
582+
10 {10} 10
583+
\.
584+
564585
-- clean up
565586
DROP TABLE forcetest;
566587
DROP TABLE vistest;

0 commit comments

Comments
 (0)