Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 34c70c7

Browse files
committed
Instrument checkpoint sync calls.
Greg Smith, reviewed by Jeff Janes
1 parent 9878e29 commit 34c70c7

File tree

3 files changed

+79
-6
lines changed

3 files changed

+79
-6
lines changed

src/backend/access/transam/xlog.c

+35-6
Original file line numberDiff line numberDiff line change
@@ -6953,10 +6953,15 @@ LogCheckpointEnd(bool restartpoint)
69536953
{
69546954
long write_secs,
69556955
sync_secs,
6956-
total_secs;
6956+
total_secs,
6957+
longest_secs,
6958+
average_secs;
69576959
int write_usecs,
69586960
sync_usecs,
6959-
total_usecs;
6961+
total_usecs,
6962+
longest_usecs,
6963+
average_usecs;
6964+
uint64 average_sync_time;
69606965

69616966
CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
69626967

@@ -6972,26 +6977,50 @@ LogCheckpointEnd(bool restartpoint)
69726977
CheckpointStats.ckpt_sync_end_t,
69736978
&sync_secs, &sync_usecs);
69746979

6980+
/*
6981+
* Timing values returned from CheckpointStats are in microseconds.
6982+
* Convert to the second plus microsecond form that TimestampDifference
6983+
* returns for homogeneous printing.
6984+
*/
6985+
longest_secs = (long) (CheckpointStats.ckpt_longest_sync / 1000000);
6986+
longest_usecs = CheckpointStats.ckpt_longest_sync -
6987+
(uint64) longest_secs * 1000000;
6988+
6989+
average_sync_time = 0;
6990+
if (CheckpointStats.ckpt_sync_rels > 0)
6991+
average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6992+
CheckpointStats.ckpt_sync_rels;
6993+
average_secs = (long) (average_sync_time / 1000000);
6994+
average_usecs = average_sync_time - (uint64) average_secs * 1000000;
6995+
69756996
if (restartpoint)
69766997
elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
6977-
"write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
6998+
"write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
6999+
"sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
69787000
CheckpointStats.ckpt_bufs_written,
69797001
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
69807002
write_secs, write_usecs / 1000,
69817003
sync_secs, sync_usecs / 1000,
6982-
total_secs, total_usecs / 1000);
7004+
total_secs, total_usecs / 1000,
7005+
CheckpointStats.ckpt_sync_rels,
7006+
longest_secs, longest_usecs / 1000,
7007+
average_secs, average_usecs / 1000);
69837008
else
69847009
elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
69857010
"%d transaction log file(s) added, %d removed, %d recycled; "
6986-
"write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
7011+
"write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
7012+
"sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
69877013
CheckpointStats.ckpt_bufs_written,
69887014
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
69897015
CheckpointStats.ckpt_segs_added,
69907016
CheckpointStats.ckpt_segs_removed,
69917017
CheckpointStats.ckpt_segs_recycled,
69927018
write_secs, write_usecs / 1000,
69937019
sync_secs, sync_usecs / 1000,
6994-
total_secs, total_usecs / 1000);
7020+
total_secs, total_usecs / 1000,
7021+
CheckpointStats.ckpt_sync_rels,
7022+
longest_secs, longest_usecs / 1000,
7023+
average_secs, average_usecs / 1000);
69957024
}
69967025

69977026
/*

src/backend/storage/smgr/md.c

+37
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "catalog/catalog.h"
2222
#include "miscadmin.h"
23+
#include "portability/instr_time.h"
2324
#include "postmaster/bgwriter.h"
2425
#include "storage/fd.h"
2526
#include "storage/bufmgr.h"
@@ -927,6 +928,15 @@ mdsync(void)
927928
PendingOperationEntry *entry;
928929
int absorb_counter;
929930

931+
/* Statistics on sync times */
932+
int processed = 0;
933+
instr_time sync_start,
934+
sync_end,
935+
sync_diff;
936+
uint64 elapsed;
937+
uint64 longest = 0;
938+
uint64 total_elapsed = 0;
939+
930940
/*
931941
* This is only called during checkpoints, and checkpoints should only
932942
* occur in processes that have created a pendingOpsTable.
@@ -1069,9 +1079,31 @@ mdsync(void)
10691079
seg = _mdfd_getseg(reln, entry->tag.forknum,
10701080
entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
10711081
false, EXTENSION_RETURN_NULL);
1082+
1083+
if (log_checkpoints)
1084+
INSTR_TIME_SET_CURRENT(sync_start);
1085+
else
1086+
INSTR_TIME_SET_ZERO(sync_start);
1087+
10721088
if (seg != NULL &&
10731089
FileSync(seg->mdfd_vfd) >= 0)
1090+
{
1091+
if (log_checkpoints && (! INSTR_TIME_IS_ZERO(sync_start)))
1092+
{
1093+
INSTR_TIME_SET_CURRENT(sync_end);
1094+
sync_diff = sync_end;
1095+
INSTR_TIME_SUBTRACT(sync_diff, sync_start);
1096+
elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
1097+
if (elapsed > longest)
1098+
longest = elapsed;
1099+
total_elapsed += elapsed;
1100+
processed++;
1101+
elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
1102+
processed, FilePathName(seg->mdfd_vfd), (double) elapsed / 1000);
1103+
}
1104+
10741105
break; /* success; break out of retry loop */
1106+
}
10751107

10761108
/*
10771109
* XXX is there any point in allowing more than one retry?
@@ -1113,6 +1145,11 @@ mdsync(void)
11131145
elog(ERROR, "pendingOpsTable corrupted");
11141146
} /* end loop over hashtable entries */
11151147

1148+
/* Return sync performance metrics for report at checkpoint end */
1149+
CheckpointStats.ckpt_sync_rels = processed;
1150+
CheckpointStats.ckpt_longest_sync = longest;
1151+
CheckpointStats.ckpt_agg_sync_time = total_elapsed;
1152+
11161153
/* Flag successful completion of mdsync */
11171154
mdsync_in_progress = false;
11181155
}

src/include/access/xlog.h

+7
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,13 @@ typedef struct CheckpointStatsData
257257
int ckpt_segs_added; /* # of new xlog segments created */
258258
int ckpt_segs_removed; /* # of xlog segments deleted */
259259
int ckpt_segs_recycled; /* # of xlog segments recycled */
260+
261+
int ckpt_sync_rels; /* # of relations synced */
262+
uint64 ckpt_longest_sync; /* Longest sync for one relation */
263+
uint64 ckpt_agg_sync_time; /* The sum of all the individual sync
264+
* times, which is not necessarily the
265+
* same as the total elapsed time for
266+
* the entire sync phase. */
260267
} CheckpointStatsData;
261268

262269
extern CheckpointStatsData CheckpointStats;

0 commit comments

Comments
 (0)