|
44 | 44 | *
|
45 | 45 | * General copy partition implementation:
|
46 | 46 | * - Disable subscription on destination, otherwise we can't drop rep slot on
|
47 |
| - source. |
| 47 | + * source. |
48 | 48 | * - Idempotently create publication and repl slot on source.
|
49 | 49 | * - Idempotently create table and async subscription on destination.
|
50 | 50 | * We use async subscription, because sync would block table while copy is
|
|
65 | 65 | * etc), we have to start everything from the ground if master reboots. This
|
66 | 66 | * is arguably fine.
|
67 | 67 | *
|
| 68 | + * Short description of all tasks: |
| 69 | + * move_primary: |
| 70 | + * copy part, update metadata |
| 71 | + * On metadata update: |
| 72 | + * on src node, drop lr copy stuff, create foreign table and replace |
| 73 | + * table with it, drop table. Drop primary lr stuff. |
| 74 | + * on dst node, replace foreign table with fresh copy (lock it until |
| 75 | + * sync_standby_names updated?), drop the former. drop lr copy stuff. |
| 76 | + * Create primary lr stuff (including sync_standby_names) |
| 77 | + * On node with replica (if exists) alter sub and alter fdw server. |
| 78 | + * on others, alter fdw server. |
| 79 | + * |
| 80 | + * About fdws on replicas: we have to keep partition of parent table as fdw, |
| 81 | + * because otherwise we would not be able to write anything to it. On the |
| 82 | + * other hand, keeping the whole list of replicas is a bit excessive and |
| 83 | + * slower in case of primary failure: we need actually only primary and |
| 84 | + * ourself. |
| 85 | + * |
| 86 | + * add_replica: |
| 87 | + * copy part from the last replica (because only the last replica knows |
| 88 | + * when it has created sync lr channel and can make table writable again). |
| 89 | + * Make dst table read-only for non-replica role, update metadata. |
| 90 | + * On metadata update: |
| 91 | + * on (old) last replica, alter cp lr channel to make it sync (and rename), |
| 92 | + * make table writable. |
| 93 | + * on node with fresh replica, rename lr channel, alter fdw server. |
| 94 | + * on others, alter fdw server. |
| 95 | + * |
| 96 | + * move_replica: |
| 97 | + * copy part. Make dst table read-only for non-replica role, update |
| 98 | + * metadata. |
| 99 | + * On metadata update: |
| 100 | + * On src, drop lr copy stuff, alter fdw server. Drop lr pub (if any) and |
| 101 | + * sub stuff. Drop table. |
| 102 | + * On dst, drop lr copy stuff, create lr pub & sync sub, alter fdw server. |
| 103 | + * On previous part node, alter lr channel |
| 104 | + * On following part node (if any), recreate sub. |
| 105 | + * |
68 | 106 | * -------------------------------------------------------------------------
|
69 | 107 | */
|
70 | 108 | #include "postgres.h"
|
@@ -334,7 +372,7 @@ move_primary(Cmd *cmd)
|
334 | 372 |
|
335 | 373 | /*
|
336 | 374 | * Fill CopyPartState, retrieving needed data. If something goes wrong, we
|
337 |
| - * don't bother to fill the rest of fields. |
| 375 | + * don't bother to fill the rest of fields and mark task as failed. |
338 | 376 | */
|
339 | 377 | void
|
340 | 378 | init_cp_state(CopyPartState *cps, const char *part_name, int32 dst_node)
|
@@ -390,6 +428,7 @@ init_cp_state(CopyPartState *cps, const char *part_name, int32 dst_node)
|
390 | 428 | cps->logname, cps->logname, cps->part_name, cps->logname
|
391 | 429 | );
|
392 | 430 | cps->relation = get_partition_relation(part_name);
|
| 431 | + Assert(cps->relation != NULL); |
393 | 432 | cps->dst_create_tab_and_sub_sql = psprintf(
|
394 | 433 | "drop table if exists %s cascade;"
|
395 | 434 | /*
|
@@ -474,6 +513,7 @@ exec_tasks(CopyPartState **tasks, int ntasks)
|
474 | 513 | shmn_elog(FATAL, "clock_gettime failed, %s", strerror(e));
|
475 | 514 | for (i = 0; i < ntasks; i++)
|
476 | 515 | {
|
| 516 | + /* TODO: make sure one part is touched only by one task */ |
477 | 517 | if (tasks[i]->res != TASK_FAILED)
|
478 | 518 | {
|
479 | 519 | CopyPartStateNode *cps_node = palloc(sizeof(CopyPartStateNode));
|
|
0 commit comments