20
20
* the desired number of worker processes, which each enter WaitForCommands().
21
21
*
22
22
* The master process dispatches an individual work item to one of the worker
23
- * processes in DispatchJobForTocEntry(). That calls
24
- * AH->MasterStartParallelItemPtr, a routine of the output format. This
25
- * function's arguments are the parents archive handle AH (containing the full
26
- * catalog information), the TocEntry that the worker should work on and a
27
- * T_Action value indicating whether this is a backup or a restore task. The
28
- * function simply converts the TocEntry assignment into a command string that
29
- * is then sent over to the worker process. In the simplest case that would be
30
- * something like "DUMP 1234", with 1234 being the TocEntry id.
31
- *
23
+ * processes in DispatchJobForTocEntry(). We send a command string such as
24
+ * "DUMP 1234" or "RESTORE 1234", where 1234 is the TocEntry ID.
32
25
* The worker process receives and decodes the command and passes it to the
33
26
* routine pointed to by AH->WorkerJobDumpPtr or AH->WorkerJobRestorePtr,
34
27
* which are routines of the current archive format. That routine performs
35
- * the required action (dump or restore) and returns a malloc'd status string.
36
- * The status string is passed back to the master where it is interpreted by
37
- * AH->MasterEndParallelItemPtr, another format-specific routine. That
38
- * function can update format-specific information on the master's side,
39
- * depending on the reply from the worker process. In the end it returns a
40
- * status code, which we pass to the ParallelCompletionPtr callback function
41
- * that was passed to DispatchJobForTocEntry(). The callback function does
42
- * state updating for the master control logic in pg_backup_archiver.c.
28
+ * the required action (dump or restore) and returns an integer status code.
29
+ * This is passed back to the master where we pass it to the
30
+ * ParallelCompletionPtr callback function that was passed to
31
+ * DispatchJobForTocEntry(). The callback function does state updating
32
+ * for the master control logic in pg_backup_archiver.c.
43
33
*
44
- * Remember that we have forked off the workers only after we have read in
45
- * the catalog. That's why our worker processes can also access the catalog
46
- * information. (In the Windows case, the workers are threads in the same
47
- * process. To avoid problems, they work with cloned copies of the Archive
48
- * data structure; see RunWorker().)
34
+ * In principle additional archive-format-specific information might be needed
35
+ * in commands or worker status responses, but so far that hasn't proved
36
+ * necessary, since workers have full copies of the ArchiveHandle/TocEntry
37
+ * data structures. Remember that we have forked off the workers only after
38
+ * we have read in the catalog. That's why our worker processes can also
39
+ * access the catalog information. (In the Windows case, the workers are
40
+ * threads in the same process. To avoid problems, they work with cloned
41
+ * copies of the Archive data structure; see RunWorker().)
49
42
*
50
43
* In the master process, the workerStatus field for each worker has one of
51
44
* the following values:
@@ -1073,6 +1066,110 @@ ParallelBackupEnd(ArchiveHandle *AH, ParallelState *pstate)
1073
1066
free (pstate );
1074
1067
}
1075
1068
1069
+ /*
1070
+ * These next four functions handle construction and parsing of the command
1071
+ * strings and response strings for parallel workers.
1072
+ *
1073
+ * Currently, these can be the same regardless of which archive format we are
1074
+ * processing. In future, we might want to let format modules override these
1075
+ * functions to add format-specific data to a command or response.
1076
+ */
1077
+
1078
+ /*
1079
+ * buildWorkerCommand: format a command string to send to a worker.
1080
+ *
1081
+ * The string is built in the caller-supplied buffer of size buflen.
1082
+ */
1083
+ static void
1084
+ buildWorkerCommand (ArchiveHandle * AH , TocEntry * te , T_Action act ,
1085
+ char * buf , int buflen )
1086
+ {
1087
+ if (act == ACT_DUMP )
1088
+ snprintf (buf , buflen , "DUMP %d" , te -> dumpId );
1089
+ else if (act == ACT_RESTORE )
1090
+ snprintf (buf , buflen , "RESTORE %d" , te -> dumpId );
1091
+ else
1092
+ Assert (false);
1093
+ }
1094
+
1095
+ /*
1096
+ * parseWorkerCommand: interpret a command string in a worker.
1097
+ */
1098
+ static void
1099
+ parseWorkerCommand (ArchiveHandle * AH , TocEntry * * te , T_Action * act ,
1100
+ const char * msg )
1101
+ {
1102
+ DumpId dumpId ;
1103
+ int nBytes ;
1104
+
1105
+ if (messageStartsWith (msg , "DUMP " ))
1106
+ {
1107
+ * act = ACT_DUMP ;
1108
+ sscanf (msg , "DUMP %d%n" , & dumpId , & nBytes );
1109
+ Assert (nBytes == strlen (msg ));
1110
+ * te = getTocEntryByDumpId (AH , dumpId );
1111
+ Assert (* te != NULL );
1112
+ }
1113
+ else if (messageStartsWith (msg , "RESTORE " ))
1114
+ {
1115
+ * act = ACT_RESTORE ;
1116
+ sscanf (msg , "RESTORE %d%n" , & dumpId , & nBytes );
1117
+ Assert (nBytes == strlen (msg ));
1118
+ * te = getTocEntryByDumpId (AH , dumpId );
1119
+ Assert (* te != NULL );
1120
+ }
1121
+ else
1122
+ exit_horribly (modulename ,
1123
+ "unrecognized command received from master: \"%s\"\n" ,
1124
+ msg );
1125
+ }
1126
+
1127
+ /*
1128
+ * buildWorkerResponse: format a response string to send to the master.
1129
+ *
1130
+ * The string is built in the caller-supplied buffer of size buflen.
1131
+ */
1132
+ static void
1133
+ buildWorkerResponse (ArchiveHandle * AH , TocEntry * te , T_Action act , int status ,
1134
+ char * buf , int buflen )
1135
+ {
1136
+ snprintf (buf , buflen , "OK %d %d %d" ,
1137
+ te -> dumpId ,
1138
+ status ,
1139
+ status == WORKER_IGNORED_ERRORS ? AH -> public .n_errors : 0 );
1140
+ }
1141
+
1142
+ /*
1143
+ * parseWorkerResponse: parse the status message returned by a worker.
1144
+ *
1145
+ * Returns the integer status code, and may update fields of AH and/or te.
1146
+ */
1147
+ static int
1148
+ parseWorkerResponse (ArchiveHandle * AH , TocEntry * te ,
1149
+ const char * msg )
1150
+ {
1151
+ DumpId dumpId ;
1152
+ int nBytes ,
1153
+ n_errors ;
1154
+ int status = 0 ;
1155
+
1156
+ if (messageStartsWith (msg , "OK " ))
1157
+ {
1158
+ sscanf (msg , "OK %d %d %d%n" , & dumpId , & status , & n_errors , & nBytes );
1159
+
1160
+ Assert (dumpId == te -> dumpId );
1161
+ Assert (nBytes == strlen (msg ));
1162
+
1163
+ AH -> public .n_errors += n_errors ;
1164
+ }
1165
+ else
1166
+ exit_horribly (modulename ,
1167
+ "invalid message received from worker: \"%s\"\n" ,
1168
+ msg );
1169
+
1170
+ return status ;
1171
+ }
1172
+
1076
1173
/*
1077
1174
* Dispatch a job to some free worker.
1078
1175
*
@@ -1091,18 +1188,16 @@ DispatchJobForTocEntry(ArchiveHandle *AH,
1091
1188
void * callback_data )
1092
1189
{
1093
1190
int worker ;
1094
- char * arg ;
1191
+ char buf [ 256 ] ;
1095
1192
1096
1193
/* Get a worker, waiting if none are idle */
1097
1194
while ((worker = GetIdleWorker (pstate )) == NO_SLOT )
1098
1195
WaitForWorkers (AH , pstate , WFW_ONE_IDLE );
1099
1196
1100
1197
/* Construct and send command string */
1101
- arg = (AH -> MasterStartParallelItemPtr ) (AH , te , act );
1102
-
1103
- sendMessageToWorker (pstate , worker , arg );
1198
+ buildWorkerCommand (AH , te , act , buf , sizeof (buf ));
1104
1199
1105
- /* XXX aren't we leaking string here? (no, because it's static. Ick.) */
1200
+ sendMessageToWorker ( pstate , worker , buf );
1106
1201
1107
1202
/* Remember worker is busy, and which TocEntry it's working on */
1108
1203
pstate -> parallelSlot [worker ].workerStatus = WRKR_WORKING ;
@@ -1220,10 +1315,10 @@ static void
1220
1315
WaitForCommands (ArchiveHandle * AH , int pipefd [2 ])
1221
1316
{
1222
1317
char * command ;
1223
- DumpId dumpId ;
1224
- int nBytes ;
1225
- char * str ;
1226
1318
TocEntry * te ;
1319
+ T_Action act ;
1320
+ int status = 0 ;
1321
+ char buf [256 ];
1227
1322
1228
1323
for (;;)
1229
1324
{
@@ -1233,47 +1328,29 @@ WaitForCommands(ArchiveHandle *AH, int pipefd[2])
1233
1328
return ;
1234
1329
}
1235
1330
1236
- if (messageStartsWith (command , "DUMP " ))
1237
- {
1238
- /* Decode the command */
1239
- sscanf (command + strlen ("DUMP " ), "%d%n" , & dumpId , & nBytes );
1240
- Assert (nBytes == strlen (command ) - strlen ("DUMP " ));
1241
- te = getTocEntryByDumpId (AH , dumpId );
1242
- Assert (te != NULL );
1331
+ /* Decode the command */
1332
+ parseWorkerCommand (AH , & te , & act , command );
1243
1333
1334
+ if (act == ACT_DUMP )
1335
+ {
1244
1336
/* Acquire lock on this table within the worker's session */
1245
1337
lockTableForWorker (AH , te );
1246
1338
1247
1339
/* Perform the dump command */
1248
- str = (AH -> WorkerJobDumpPtr ) (AH , te );
1249
-
1250
- /* Return status to master */
1251
- sendMessageToMaster (pipefd , str );
1252
-
1253
- /* we are responsible for freeing the status string */
1254
- free (str );
1340
+ status = (AH -> WorkerJobDumpPtr ) (AH , te );
1255
1341
}
1256
- else if (messageStartsWith ( command , "RESTORE " ) )
1342
+ else if (act == ACT_RESTORE )
1257
1343
{
1258
- /* Decode the command */
1259
- sscanf (command + strlen ("RESTORE " ), "%d%n" , & dumpId , & nBytes );
1260
- Assert (nBytes == strlen (command ) - strlen ("RESTORE " ));
1261
- te = getTocEntryByDumpId (AH , dumpId );
1262
- Assert (te != NULL );
1263
-
1264
1344
/* Perform the restore command */
1265
- str = (AH -> WorkerJobRestorePtr ) (AH , te );
1266
-
1267
- /* Return status to master */
1268
- sendMessageToMaster (pipefd , str );
1269
-
1270
- /* we are responsible for freeing the status string */
1271
- free (str );
1345
+ status = (AH -> WorkerJobRestorePtr ) (AH , te );
1272
1346
}
1273
1347
else
1274
- exit_horribly (modulename ,
1275
- "unrecognized command received from master: \"%s\"\n" ,
1276
- command );
1348
+ Assert (false);
1349
+
1350
+ /* Return status to master */
1351
+ buildWorkerResponse (AH , te , act , status , buf , sizeof (buf ));
1352
+
1353
+ sendMessageToMaster (pipefd , buf );
1277
1354
1278
1355
/* command was pg_malloc'd and we are responsible for free()ing it. */
1279
1356
free (command );
@@ -1286,9 +1363,9 @@ WaitForCommands(ArchiveHandle *AH, int pipefd[2])
1286
1363
* If do_wait is true, wait to get a status message; otherwise, just return
1287
1364
* immediately if there is none available.
1288
1365
*
1289
- * When we get a status message, we let MasterEndParallelItemPtr process it,
1290
- * then pass the resulting status code to the callback function that was
1291
- * specified to DispatchJobForTocEntry, then reset the worker status to IDLE.
1366
+ * When we get a status message, we pass the status code to the callback
1367
+ * function that was specified to DispatchJobForTocEntry, then reset the
1368
+ * worker status to IDLE.
1292
1369
*
1293
1370
* Returns true if we collected a status message, else false.
1294
1371
*
@@ -1318,29 +1395,10 @@ ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait)
1318
1395
{
1319
1396
ParallelSlot * slot = & pstate -> parallelSlot [worker ];
1320
1397
TocEntry * te = slot -> te ;
1321
- char * statusString ;
1322
1398
int status ;
1323
1399
1324
- if (messageStartsWith (msg , "OK RESTORE " ))
1325
- {
1326
- statusString = msg + strlen ("OK RESTORE " );
1327
- status =
1328
- (AH -> MasterEndParallelItemPtr )
1329
- (AH , te , statusString , ACT_RESTORE );
1330
- slot -> callback (AH , te , status , slot -> callback_data );
1331
- }
1332
- else if (messageStartsWith (msg , "OK DUMP " ))
1333
- {
1334
- statusString = msg + strlen ("OK DUMP " );
1335
- status =
1336
- (AH -> MasterEndParallelItemPtr )
1337
- (AH , te , statusString , ACT_DUMP );
1338
- slot -> callback (AH , te , status , slot -> callback_data );
1339
- }
1340
- else
1341
- exit_horribly (modulename ,
1342
- "invalid message received from worker: \"%s\"\n" ,
1343
- msg );
1400
+ status = parseWorkerResponse (AH , te , msg );
1401
+ slot -> callback (AH , te , status , slot -> callback_data );
1344
1402
slot -> workerStatus = WRKR_IDLE ;
1345
1403
slot -> te = NULL ;
1346
1404
}
@@ -1364,8 +1422,8 @@ ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait)
1364
1422
* WFW_ONE_IDLE: wait for at least one worker to be idle
1365
1423
* WFW_ALL_IDLE: wait for all workers to be idle
1366
1424
*
1367
- * Any received results are passed to MasterEndParallelItemPtr and then
1368
- * to the callback specified to DispatchJobForTocEntry.
1425
+ * Any received results are passed to the callback specified to
1426
+ * DispatchJobForTocEntry.
1369
1427
*
1370
1428
* This function is executed in the master process.
1371
1429
*/
0 commit comments