From 7103ebb7aae8ab8076b7e85f335ceb8fe799097c Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Mon, 28 Mar 2022 16:45:58 +0200 Subject: Add support for MERGE SQL command MERGE performs actions that modify rows in the target table using a source table or query. MERGE provides a single SQL statement that can conditionally INSERT/UPDATE/DELETE rows -- a task that would otherwise require multiple PL statements. For example, MERGE INTO target AS t USING source AS s ON t.tid = s.sid WHEN MATCHED AND t.balance > s.delta THEN UPDATE SET balance = t.balance - s.delta WHEN MATCHED THEN DELETE WHEN NOT MATCHED AND s.delta > 0 THEN INSERT VALUES (s.sid, s.delta) WHEN NOT MATCHED THEN DO NOTHING; MERGE works with regular tables, partitioned tables and inheritance hierarchies, including column and row security enforcement, as well as support for row and statement triggers and transition tables therein. MERGE is optimized for OLTP and is parameterizable, though also useful for large scale ETL/ELT. MERGE is not intended to be used in preference to existing single SQL commands for INSERT, UPDATE or DELETE since there is some overhead. MERGE can be used from PL/pgSQL. MERGE does not support targetting updatable views or foreign tables, and RETURNING clauses are not allowed either. These limitations are likely fixable with sufficient effort. Rewrite rules are also not supported, but it's not clear that we'd want to support them. Author: Pavan Deolasee Author: Álvaro Herrera Author: Amit Langote Author: Simon Riggs Reviewed-by: Peter Eisentraut Reviewed-by: Andres Freund (earlier versions) Reviewed-by: Peter Geoghegan (earlier versions) Reviewed-by: Robert Haas (earlier versions) Reviewed-by: Japin Li Reviewed-by: Justin Pryzby Reviewed-by: Tomas Vondra Reviewed-by: Zhihong Yu Discussion: https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com Discussion: https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com Discussion: https://postgr.es/m/20201231134736.GA25392@alvherre.pgsql --- doc/src/sgml/libpq.sgml | 8 +- doc/src/sgml/mvcc.sgml | 34 +- doc/src/sgml/plpgsql.sgml | 5 +- doc/src/sgml/ref/allfiles.sgml | 1 + doc/src/sgml/ref/create_policy.sgml | 23 +- doc/src/sgml/ref/insert.sgml | 11 +- doc/src/sgml/ref/merge.sgml | 620 ++++++++++++++++++++++++++++++++++++ doc/src/sgml/reference.sgml | 1 + doc/src/sgml/trigger.sgml | 22 ++ 9 files changed, 713 insertions(+), 12 deletions(-) create mode 100644 doc/src/sgml/ref/merge.sgml (limited to 'doc/src') diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 3998b1781b9..70233aa872e 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -4125,9 +4125,11 @@ char *PQcmdTuples(PGresult *res); PGresult. This function can only be used following the execution of a SELECT, CREATE TABLE AS, INSERT, UPDATE, DELETE, - MOVE, FETCH, or COPY statement, - or an EXECUTE of a prepared query that contains an - INSERT, UPDATE, or DELETE statement. + MERGE, MOVE, FETCH, + or COPY statement, or an EXECUTE of a + prepared query that contains an INSERT, + UPDATE, DELETE, + or MERGE statement. If the command that generated the PGresult was anything else, returns an empty string. The caller should not free the return value directly. It will be freed when diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index b4d1e571705..905460723c5 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -422,6 +422,37 @@ COMMIT; 11, which no longer matches the criteria. + + MERGE allows the user to specify various + combinations of INSERT, UPDATE + or DELETE subcommands. A MERGE + command with both INSERT and UPDATE + subcommands looks similar to INSERT with an + ON CONFLICT DO UPDATE clause but does not + guarantee that either INSERT or + UPDATE will occur. + If MERGE attempts an UPDATE or + DELETE and the row is concurrently updated but + the join condition still passes for the current target and the + current source tuple, then MERGE will behave + the same as the UPDATE or + DELETE commands and perform its action on the + updated version of the row. However, because MERGE + can specify several actions and they can be conditional, the + conditions for each action are re-evaluated on the updated version of + the row, starting from the first action, even if the action that had + originally matched appears later in the list of actions. + On the other hand, if the row is concurrently updated or deleted so + that the join condition fails, then MERGE will + evaluate the condition's NOT MATCHED actions next, + and execute the first one that succeeds. + If MERGE attempts an INSERT + and a unique index is present and a duplicate row is concurrently + inserted, then a uniqueness violation is raised. + MERGE does not attempt to avoid the + error by executing an UPDATE. + + Because Read Committed mode starts each command with a new snapshot that includes all transactions committed up to that instant, @@ -924,7 +955,8 @@ ERROR: could not serialize access due to read/write dependencies among transact The commands UPDATE, - DELETE, and INSERT + DELETE, INSERT, and + MERGE acquire this lock mode on the target table (in addition to ACCESS SHARE locks on any other referenced tables). In general, this lock mode will be acquired by any diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml index e5c1356d8c5..7ebc6593f10 100644 --- a/doc/src/sgml/plpgsql.sgml +++ b/doc/src/sgml/plpgsql.sgml @@ -1388,7 +1388,7 @@ EXECUTE format('SELECT count(*) FROM %I ' Another restriction on parameter symbols is that they only work in optimizable SQL commands (SELECT, INSERT, UPDATE, - DELETE, and certain commands containing one of these). + DELETE, MERGE, and certain commands containing one of these). In other statement types (generically called utility statements), you must insert values textually even if they are just data values. @@ -1666,7 +1666,8 @@ GET DIAGNOSTICS integer_var = ROW_COUNT; - UPDATE, INSERT, and DELETE + UPDATE, INSERT, DELETE, + and MERGE statements set FOUND true if at least one row is affected, false if no row is affected. diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index d67270ccc35..e90a0e1f837 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -158,6 +158,7 @@ Complete list of usable sgml source files in this directory. + diff --git a/doc/src/sgml/ref/create_policy.sgml b/doc/src/sgml/ref/create_policy.sgml index f898b7a2185..e76c342d3da 100644 --- a/doc/src/sgml/ref/create_policy.sgml +++ b/doc/src/sgml/ref/create_policy.sgml @@ -55,7 +55,8 @@ CREATE POLICY name ON - For INSERT and UPDATE statements, + For INSERT, UPDATE, and + MERGE statements, WITH CHECK expressions are enforced after BEFORE triggers are fired, and before any actual data modifications are made. Thus a BEFORE ROW trigger may @@ -281,7 +282,9 @@ CREATE POLICY name ON Using INSERT for a policy means that it will apply - to INSERT commands. Rows being inserted that do + to INSERT commands and MERGE + commands that contain INSERT actions. + Rows being inserted that do not pass this policy will result in a policy violation error, and the entire INSERT command will be aborted. An INSERT policy cannot have @@ -305,7 +308,9 @@ CREATE POLICY name ON UPDATE, SELECT FOR UPDATE and SELECT FOR SHARE commands, as well as auxiliary ON CONFLICT DO UPDATE clauses of - INSERT commands. Since UPDATE + INSERT commands. + MERGE commands containing UPDATE + actions are affected as well. Since UPDATE involves pulling an existing record and replacing it with a new modified record, UPDATE policies accept both a USING expression and @@ -435,7 +440,7 @@ CREATE POLICY name ON - INSERT + INSERT / MERGE ... THEN INSERT New row @@ -459,7 +464,7 @@ CREATE POLICY name ON - UPDATE + UPDATE / MERGE ... THEN UPDATE Existing & new rows @@ -613,6 +618,14 @@ AND (see CREATE VIEW). + + No separate policy exists for MERGE. Instead, the policies + defined for SELECT, INSERT, + UPDATE, and DELETE are applied + while executing MERGE, depending on the actions that are + performed. + + Additional discussion and practical examples can be found in . diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml index 2973b72b815..a9af9959c08 100644 --- a/doc/src/sgml/ref/insert.sgml +++ b/doc/src/sgml/ref/insert.sgml @@ -589,6 +589,13 @@ INSERT oid count + + + You may also wish to consider using MERGE, since that + allows mixing INSERT, UPDATE, and + DELETE within a single statement. + See . + @@ -759,7 +766,9 @@ INSERT INTO distributors (did, dname) VALUES (10, 'Conrad International') Also, the case in which a column name list is omitted, but not all the columns are filled from the VALUES clause or query, - is disallowed by the standard. + is disallowed by the standard. If you prefer a more SQL standard + conforming statement than ON CONFLICT, see + . diff --git a/doc/src/sgml/ref/merge.sgml b/doc/src/sgml/ref/merge.sgml new file mode 100644 index 00000000000..c547122c9bb --- /dev/null +++ b/doc/src/sgml/ref/merge.sgml @@ -0,0 +1,620 @@ + + + + + + MERGE + 7 + SQL - Language Statements + + + + MERGE + conditionally insert, update, or delete rows of a table + + + + +[ WITH with_query [, ...] ] +MERGE INTO target_table_name [ [ AS ] target_alias ] +USING data_source ON join_condition +when_clause [...] + +where data_source is + +{ source_table_name | ( source_query ) } [ [ AS ] source_alias ] + +and when_clause is + +{ WHEN MATCHED [ AND condition ] THEN { merge_update | merge_delete | DO NOTHING } | + WHEN NOT MATCHED [ AND condition ] THEN { merge_insert | DO NOTHING } } + +and merge_insert is + +INSERT [( column_name [, ...] )] +[ OVERRIDING { SYSTEM | USER } VALUE ] +{ VALUES ( { expression | DEFAULT } [, ...] ) | DEFAULT VALUES } + +and merge_update is + +UPDATE SET { column_name = { expression | DEFAULT } | + ( column_name [, ...] ) = ( { expression | DEFAULT } [, ...] ) } [, ...] + +and merge_delete is + +DELETE + + + + + Description + + + MERGE performs actions that modify rows in the + target_table_name, + using the data_source. + MERGE provides a single SQL + statement that can conditionally INSERT, + UPDATE or DELETE rows, a task + that would otherwise require multiple procedural language statements. + + + + First, the MERGE command performs a join + from data_source to + target_table_name + producing zero or more candidate change rows. For each candidate change + row, the status of MATCHED or NOT MATCHED + is set just once, after which WHEN clauses are evaluated + in the order specified. For each candidate change row, the first clause to + evaluate as true is executed. No more than one WHEN + clause is executed for any candidate change row. + + + + MERGE actions have the same effect as + regular UPDATE, INSERT, or + DELETE commands of the same names. The syntax of + those commands is different, notably that there is no WHERE + clause and no table name is specified. All actions refer to the + target_table_name, + though modifications to other tables may be made using triggers. + + + + When DO NOTHING is specified, the source row is + skipped. Since actions are evaluated in their specified order, DO + NOTHING can be handy to skip non-interesting source rows before + more fine-grained handling. + + + + There is no separate MERGE privilege. + If you specify an update action, you must have the + UPDATE privilege on the column(s) + of the target_table_name + that are referred to in the SET clause. + If you specify an insert action, you must have the INSERT + privilege on the target_table_name. + If you specify an delete action, you must have the DELETE + privilege on the target_table_name. + Privileges are tested once at statement start and are checked + whether or not particular WHEN clauses are executed. + You will require the SELECT privilege on the + data_source and any column(s) + of the target_table_name + referred to in a condition. + + + + MERGE is not supported if the + target_table_name is a + materialized view, foreign table, or if it has any + rules defined on it. + + + + + Parameters + + + + target_table_name + + + The name (optionally schema-qualified) of the target table to merge into. + + + + + + target_alias + + + A substitute name for the target table. When an alias is + provided, it completely hides the actual name of the table. For + example, given MERGE INTO foo AS f, the remainder of the + MERGE statement must refer to this table as + f not foo. + + + + + + source_table_name + + + The name (optionally schema-qualified) of the source table, view, or + transition table. + + + + + + source_query + + + A query (SELECT statement or VALUES + statement) that supplies the rows to be merged into the + target_table_name. + Refer to the + statement or + statement for a description of the syntax. + + + + + + source_alias + + + A substitute name for the data source. When an alias is + provided, it completely hides the actual name of the table or the fact + that a query was issued. + + + + + + join_condition + + + join_condition is + an expression resulting in a value of type + boolean (similar to a WHERE + clause) that specifies which rows in the + data_source + match rows in the + target_table_name. + + + + Only columns from target_table_name + that attempt to match data_source + rows should appear in join_condition. + join_condition subexpressions that + only reference target_table_name + columns can affect which action is taken, often in surprising ways. + + + + + + + when_clause + + + At least one WHEN clause is required. + + + If the WHEN clause specifies WHEN MATCHED + and the candidate change row matches a row in the + target_table_name, + the WHEN clause is executed if the + condition is + absent or it evaluates to true. + + + Conversely, if the WHEN clause specifies + WHEN NOT MATCHED + and the candidate change row does not match a row in the + target_table_name, + the WHEN clause is executed if the + condition is + absent or it evaluates to true. + + + + + + condition + + + An expression that returns a value of type boolean. + If this expression for a WHEN clause + returns true, then the action for that clause + is executed for that row. + + + A condition on a WHEN MATCHED clause can refer to columns + in both the source and the target relations. A condition on a + WHEN NOT MATCHED clause can only refer to columns from + the source relation, since by definition there is no matching target row. + Only the system attributes from the target table are accessible. + + + + + + merge_insert + + + The specification of an INSERT action that inserts + one row into the target table. + The target column names can be listed in any order. If no list of + column names is given at all, the default is all the columns of the + table in their declared order. + + + Each column not present in the explicit or implicit column list will be + filled with a default value, either its declared default value + or null if there is none. + + + If the expression for any column is not of the correct data type, + automatic type conversion will be attempted. + + + If target_table_name + is a partitioned table, each row is routed to the appropriate partition + and inserted into it. + If target_table_name + is a partition, an error will occur if any input row violates the + partition constraint. + + + Column names may not be specified more than once. + INSERT actions cannot contain sub-selects. + + + Only one VALUES clause can be specified. + The VALUES clause can only refer to columns from + the source relation, since by definition there is no matching target row. + + + + + + merge_update + + + The specification of an UPDATE action that updates + the current row of the target_table_name. + Column names may not be specified more than once. + + + Neither a table name nor a WHERE clause are allowed. + + + + + + merge_delete + + + Specifies a DELETE action that deletes the current row + of the target_table_name. + Do not include the table name or any other clauses, as you would normally + do with a command. + + + + + + column_name + + + The name of a column in the target_table_name. The column name + can be qualified with a subfield name or array subscript, if + needed. (Inserting into only some fields of a composite + column leaves the other fields null.) + Do not include the table's name in the specification + of a target column. + + + + + + OVERRIDING SYSTEM VALUE + + + Without this clause, it is an error to specify an explicit value + (other than DEFAULT) for an identity column defined + as GENERATED ALWAYS. This clause overrides that + restriction. + + + + + + OVERRIDING USER VALUE + + + If this clause is specified, then any values supplied for identity + columns defined as GENERATED BY DEFAULT are ignored + and the default sequence-generated values are applied. + + + + + + DEFAULT VALUES + + + All columns will be filled with their default values. + (An OVERRIDING clause is not permitted in this + form.) + + + + + + expression + + + An expression to assign to the column. If used in a + WHEN MATCHED clause, the expression can use values + from the original row in the target table, and values from the + data_source row. + If used in a WHEN NOT MATCHED clause, the + expression can use values from the data_source. + + + + + + DEFAULT + + + Set the column to its default value (which will be NULL + if no specific default expression has been assigned to it). + + + + + + with_query + + + The WITH clause allows you to specify one or more + subqueries that can be referenced by name in the MERGE + query. See and + for details. + + + + + + + + + Outputs + + + On successful completion, a MERGE command returns a command + tag of the form + +MERGE total_count + + The total_count is the total + number of rows changed (whether inserted, updated, or deleted). + If total_count is 0, no rows + were changed in any way. + + + + + + Notes + + + The following steps take place during the execution of + MERGE. + + + + Perform any BEFORE STATEMENT triggers for all + actions specified, whether or not their WHEN + clauses match. + + + + + Perform a join from source to target table. + The resulting query will be optimized normally and will produce + a set of candidate change rows. For each candidate change row, + + + + Evaluate whether each row is MATCHED or + NOT MATCHED. + + + + + Test each WHEN condition in the order + specified until one returns true. + + + + + When a condition returns true, perform the following actions: + + + + Perform any BEFORE ROW triggers that fire + for the action's event type. + + + + + Perform the specified action, invoking any check constraints on the + target table. + + + + + Perform any AFTER ROW triggers that fire for + the action's event type. + + + + + + + + + + + Perform any AFTER STATEMENT triggers for actions + specified, whether or not they actually occur. This is similar to the + behavior of an UPDATE statement that modifies no rows. + + + + In summary, statement triggers for an event type (say, + INSERT) will be fired whenever we + specify an action of that kind. + In contrast, row-level triggers will fire only for the specific event type + being executed. + So a MERGE command might fire statement triggers for both + UPDATE and INSERT, even though only + UPDATE row triggers were fired. + + + + You should ensure that the join produces at most one candidate change row + for each target row. In other words, a target row shouldn't join to more + than one data source row. If it does, then only one of the candidate change + rows will be used to modify the target row; later attempts to modify the + row will cause an error. + This can also occur if row triggers make changes to the target table + and the rows so modified are then subsequently also modified by + MERGE. + If the repeated action is an INSERT, this will + cause a uniqueness violation, while a repeated UPDATE + or DELETE will cause a cardinality violation; the + latter behavior is required by the SQL standard. + This differs from historical PostgreSQL + behavior of joins in UPDATE and + DELETE statements where second and subsequent + attempts to modify the same row are simply ignored. + + + + If a WHEN clause omits an AND + sub-clause, it becomes the final reachable clause of that + kind (MATCHED or NOT MATCHED). + If a later WHEN clause of that kind + is specified it would be provably unreachable and an error is raised. + If no final reachable clause is specified of either kind, it is + possible that no action will be taken for a candidate change row. + + + + The order in which rows are generated from the data source is + indeterminate by default. + A source_query can be + used to specify a consistent ordering, if required, which might be + needed to avoid deadlocks between concurrent transactions. + + + + There is no RETURNING clause with + MERGE. Actions of INSERT, + UPDATE and DELETE cannot contain + RETURNING or WITH clauses. + + + + You may also wish to consider using INSERT ... ON CONFLICT + as an alternative statement which offers the ability to run an + UPDATE if a concurrent INSERT + occurs. There are a variety of differences and restrictions between + the two statement types and they are not interchangeable. + + + + + Examples + + + Perform maintenance on CustomerAccounts based + upon new Transactions. + + +MERGE INTO CustomerAccount CA +USING RecentTransactions T +ON T.CustomerId = CA.CustomerId +WHEN MATCHED THEN + UPDATE SET Balance = Balance + TransactionValue +WHEN NOT MATCHED THEN + INSERT (CustomerId, Balance) + VALUES (T.CustomerId, T.TransactionValue); + + + + + Notice that this would be exactly equivalent to the following + statement because the MATCHED result does not change + during execution. + + +MERGE INTO CustomerAccount CA +USING (Select CustomerId, TransactionValue From RecentTransactions) AS T +ON CA.CustomerId = T.CustomerId +WHEN NOT MATCHED THEN + INSERT (CustomerId, Balance) + VALUES (T.CustomerId, T.TransactionValue) +WHEN MATCHED THEN + UPDATE SET Balance = Balance + TransactionValue; + + + + + Attempt to insert a new stock item along with the quantity of stock. If + the item already exists, instead update the stock count of the existing + item. Don't allow entries that have zero stock. + +MERGE INTO wines w +USING wine_stock_changes s +ON s.winename = w.winename +WHEN NOT MATCHED AND s.stock_delta > 0 THEN + INSERT VALUES(s.winename, s.stock_delta) +WHEN MATCHED AND w.stock + s.stock_delta > 0 THEN + UPDATE SET stock = w.stock + s.stock_delta; +WHEN MATCHED THEN + DELETE; + + + The wine_stock_changes table might be, for example, a + temporary table recently loaded into the database. + + + + + + Compatibility + + This command conforms to the SQL standard. + + + The WITH clause and DO NOTHING action are extensions to + the SQL standard. + + + diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index da421ff24e2..a3b743e8c1e 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -186,6 +186,7 @@ &listen; &load; &lock; + &merge; &move; ¬ify; &prepare; diff --git a/doc/src/sgml/trigger.sgml b/doc/src/sgml/trigger.sgml index 7e2654493bb..04e702a7956 100644 --- a/doc/src/sgml/trigger.sgml +++ b/doc/src/sgml/trigger.sgml @@ -192,6 +192,28 @@ will be fired. + + No separate triggers are defined for MERGE. Instead, + statement-level or row-level UPDATE, + DELETE, and INSERT triggers are fired + depending on (for statement-level triggers) what actions are specified in + the MERGE query and (for row-level triggers) what + actions are performed. + + + + While running a MERGE command, statement-level + BEFORE and AFTER triggers are + fired for events specified in the actions of the MERGE + command, irrespective of whether or not the action is ultimately performed. + This is the same as an UPDATE statement that updates + no rows, yet statement-level triggers are fired. + The row-level triggers are fired only when a row is actually updated, + inserted or deleted. So it's perfectly legal that while statement-level + triggers are fired for certain types of action, no row-level triggers + are fired for the same kind of action. + + Trigger functions invoked by per-statement triggers should always return NULL. Trigger functions invoked by per-row -- cgit v1.2.3