Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 23bd3ce

Browse files
committed
Attempt to identify system timezone by reading /etc/localtime symlink.
On many modern platforms, /etc/localtime is a symlink to a file within the IANA database. Reading the symlink lets us find out the name of the system timezone directly, without going through the brute-force search embodied in scan_available_timezones(). This shortens the runtime of initdb by some tens of ms, which is helpful for the buildfarm, and it also allows us to reliably select the same zone name the system was actually configured for, rather than possibly choosing one of IANA's many zone aliases. (For example, in a system configured for "Asia/Tokyo", the brute-force search would not choose that name but its alias "Japan", on the grounds of the latter string being shorter. More surprisingly, "Navajo" is preferred to either "America/Denver" or "US/Mountain", as seen in an old complaint from Josh Berkus.) If /etc/localtime doesn't exist, or isn't a symlink, or we can't make sense of its contents, or the contents match a zone we know but that zone doesn't match the observed behavior of localtime(), fall back to the brute-force search. Also, tweak initdb so that it prints the zone name it selected. In passing, replace the last few references to the "Olson" database in code comments with "IANA", as that's been our preferred term since commit b2cbced. Patch by me, per a suggestion from Robert Haas; review by Michael Paquier Discussion: https://postgr.es/m/7408.1525812528@sss.pgh.pa.us
1 parent bc153c9 commit 23bd3ce

File tree

3 files changed

+133
-13
lines changed

3 files changed

+133
-13
lines changed

src/bin/initdb/findtimezone.c

+126-10
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <fcntl.h>
1616
#include <sys/stat.h>
1717
#include <time.h>
18+
#include <unistd.h>
1819

1920
#include "pgtz.h"
2021

@@ -126,12 +127,19 @@ pg_load_tz(const char *name)
126127
* On most systems, we rely on trying to match the observable behavior of
127128
* the C library's localtime() function. The database zone that matches
128129
* furthest into the past is the one to use. Often there will be several
129-
* zones with identical rankings (since the Olson database assigns multiple
130+
* zones with identical rankings (since the IANA database assigns multiple
130131
* names to many zones). We break ties arbitrarily by preferring shorter,
131132
* then alphabetically earlier zone names.
132133
*
134+
* Many modern systems use the IANA database, so if we can determine the
135+
* system's idea of which zone it is using and its behavior matches our zone
136+
* of the same name, we can skip the rather-expensive search through all the
137+
* zones in our database. This short-circuit path also ensures that we spell
138+
* the zone name the same way the system setting does, even in the presence
139+
* of multiple aliases for the same zone.
140+
*
133141
* Win32's native knowledge about timezones appears to be too incomplete
134-
* and too different from the Olson database for the above matching strategy
142+
* and too different from the IANA database for the above matching strategy
135143
* to be of any use. But there is just a limited number of timezones
136144
* available, so we can rely on a handmade mapping table instead.
137145
*/
@@ -150,6 +158,8 @@ struct tztry
150158
time_t test_times[MAX_TEST_TIMES];
151159
};
152160

161+
static bool check_system_link_file(const char *linkname, struct tztry *tt,
162+
char *bestzonename);
153163
static void scan_available_timezones(char *tzdir, char *tzdirsub,
154164
struct tztry *tt,
155165
int *bestscore, char *bestzonename);
@@ -299,12 +309,19 @@ score_timezone(const char *tzname, struct tztry *tt)
299309
return i;
300310
}
301311

312+
/*
313+
* Test whether given zone name is a perfect match to localtime() behavior
314+
*/
315+
static bool
316+
perfect_timezone_match(const char *tzname, struct tztry *tt)
317+
{
318+
return (score_timezone(tzname, tt) == tt->n_test_times);
319+
}
320+
302321

303322
/*
304323
* Try to identify a timezone name (in our terminology) that best matches the
305-
* observed behavior of the system timezone library. We cannot assume that
306-
* the system TZ environment setting (if indeed there is one) matches our
307-
* terminology, so we ignore it and just look at what localtime() returns.
324+
* observed behavior of the system localtime() function.
308325
*/
309326
static const char *
310327
identify_system_timezone(void)
@@ -339,7 +356,7 @@ identify_system_timezone(void)
339356
* way of doing things, but experience has shown that system-supplied
340357
* timezone definitions are likely to have DST behavior that is right for
341358
* the recent past and not so accurate further back. Scoring in this way
342-
* allows us to recognize zones that have some commonality with the Olson
359+
* allows us to recognize zones that have some commonality with the IANA
343360
* database, without insisting on exact match. (Note: we probe Thursdays,
344361
* not Sundays, to avoid triggering DST-transition bugs in localtime
345362
* itself.)
@@ -374,7 +391,18 @@ identify_system_timezone(void)
374391
tt.test_times[tt.n_test_times++] = t;
375392
}
376393

377-
/* Search for the best-matching timezone file */
394+
/*
395+
* Try to avoid the brute-force search by seeing if we can recognize the
396+
* system's timezone setting directly.
397+
*
398+
* Currently we just check /etc/localtime; there are other conventions for
399+
* this, but that seems to be the only one used on enough platforms to be
400+
* worth troubling over.
401+
*/
402+
if (check_system_link_file("/etc/localtime", &tt, resultbuf))
403+
return resultbuf;
404+
405+
/* No luck, so search for the best-matching timezone file */
378406
strlcpy(tmptzdir, pg_TZDIR(), sizeof(tmptzdir));
379407
bestscore = -1;
380408
resultbuf[0] = '\0';
@@ -383,7 +411,7 @@ identify_system_timezone(void)
383411
&bestscore, resultbuf);
384412
if (bestscore > 0)
385413
{
386-
/* Ignore Olson's rather silly "Factory" zone; use GMT instead */
414+
/* Ignore IANA's rather silly "Factory" zone; use GMT instead */
387415
if (strcmp(resultbuf, "Factory") == 0)
388416
return NULL;
389417
return resultbuf;
@@ -472,7 +500,7 @@ identify_system_timezone(void)
472500

473501
/*
474502
* Did not find the timezone. Fallback to use a GMT zone. Note that the
475-
* Olson timezone database names the GMT-offset zones in POSIX style: plus
503+
* IANA timezone database names the GMT-offset zones in POSIX style: plus
476504
* is west of Greenwich. It's unfortunate that this is opposite of SQL
477505
* conventions. Should we therefore change the names? Probably not...
478506
*/
@@ -486,6 +514,94 @@ identify_system_timezone(void)
486514
return resultbuf;
487515
}
488516

517+
/*
518+
* Examine a system-provided symlink file to see if it tells us the timezone.
519+
*
520+
* Unfortunately, there is little standardization of how the system default
521+
* timezone is determined in the absence of a TZ environment setting.
522+
* But a common strategy is to create a symlink at a well-known place.
523+
* If "linkname" identifies a readable symlink, and the tail of its contents
524+
* matches a zone name we know, and the actual behavior of localtime() agrees
525+
* with what we think that zone means, then we may use that zone name.
526+
*
527+
* We insist on a perfect behavioral match, which might not happen if the
528+
* system has a different IANA database version than we do; but in that case
529+
* it seems best to fall back to the brute-force search.
530+
*
531+
* linkname is the symlink file location to probe.
532+
*
533+
* tt tells about the system timezone behavior we need to match.
534+
*
535+
* If we successfully identify a zone name, store it in *bestzonename and
536+
* return true; else return false. bestzonename must be a buffer of length
537+
* TZ_STRLEN_MAX + 1.
538+
*/
539+
static bool
540+
check_system_link_file(const char *linkname, struct tztry *tt,
541+
char *bestzonename)
542+
{
543+
#ifdef HAVE_READLINK
544+
char link_target[MAXPGPATH];
545+
int len;
546+
const char *cur_name;
547+
548+
/*
549+
* Try to read the symlink. If not there, not a symlink, etc etc, just
550+
* quietly fail; the precise reason needn't concern us.
551+
*/
552+
len = readlink(linkname, link_target, sizeof(link_target));
553+
if (len < 0 || len >= sizeof(link_target))
554+
return false;
555+
link_target[len] = '\0';
556+
557+
#ifdef DEBUG_IDENTIFY_TIMEZONE
558+
fprintf(stderr, "symbolic link \"%s\" contains \"%s\"\n",
559+
linkname, link_target);
560+
#endif
561+
562+
/*
563+
* The symlink is probably of the form "/path/to/zones/zone/name", or
564+
* possibly it is a relative path. Nobody puts their zone DB directly in
565+
* the root directory, so we can definitely skip the first component; but
566+
* after that it's trial-and-error to identify which path component begins
567+
* the zone name.
568+
*/
569+
cur_name = link_target;
570+
while (*cur_name)
571+
{
572+
/* Advance to next segment of path */
573+
cur_name = strchr(cur_name + 1, '/');
574+
if (cur_name == NULL)
575+
break;
576+
/* If there are consecutive slashes, skip all, as the kernel would */
577+
do
578+
{
579+
cur_name++;
580+
} while (*cur_name == '/');
581+
582+
/*
583+
* Test remainder of path to see if it is a matching zone name.
584+
* Relative paths might contain ".."; we needn't bother testing if the
585+
* first component is that. Also defend against overlength names.
586+
*/
587+
if (*cur_name && *cur_name != '.' &&
588+
strlen(cur_name) <= TZ_STRLEN_MAX &&
589+
perfect_timezone_match(cur_name, tt))
590+
{
591+
/* Success! */
592+
strcpy(bestzonename, cur_name);
593+
return true;
594+
}
595+
}
596+
597+
/* Couldn't extract a matching zone name */
598+
return false;
599+
#else
600+
/* No symlinks? Forget it */
601+
return false;
602+
#endif
603+
}
604+
489605
/*
490606
* Recursively scan the timezone database looking for the best match to
491607
* the system timezone behavior.
@@ -586,7 +702,7 @@ static const struct
586702
* HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time
587703
* Zones on Windows 10 and Windows 7.
588704
*
589-
* The zones have been matched to Olson timezones by looking at the cities
705+
* The zones have been matched to IANA timezones by looking at the cities
590706
* listed in the win32 display name (in the comment here) in most cases.
591707
*/
592708
{

src/bin/initdb/initdb.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ static char *pgdata_native;
174174
static int n_connections = 10;
175175
static int n_buffers = 50;
176176
static const char *dynamic_shared_memory_type = NULL;
177+
static const char *default_timezone = NULL;
177178

178179
/*
179180
* Warning messages for authentication methods
@@ -1058,6 +1059,11 @@ test_config_settings(void)
10581059
printf("%dMB\n", (n_buffers * (BLCKSZ / 1024)) / 1024);
10591060
else
10601061
printf("%dkB\n", n_buffers * (BLCKSZ / 1024));
1062+
1063+
printf(_("selecting default timezone ... "));
1064+
fflush(stdout);
1065+
default_timezone = select_default_timezone(share_path);
1066+
printf("%s\n", default_timezone ? default_timezone : "GMT");
10611067
}
10621068

10631069
/*
@@ -1086,7 +1092,6 @@ setup_config(void)
10861092
char **conflines;
10871093
char repltok[MAXPGPATH];
10881094
char path[MAXPGPATH];
1089-
const char *default_timezone;
10901095
char *autoconflines[3];
10911096

10921097
fputs(_("creating configuration files ... "), stdout);
@@ -1168,7 +1173,6 @@ setup_config(void)
11681173
"#default_text_search_config = 'pg_catalog.simple'",
11691174
repltok);
11701175

1171-
default_timezone = select_default_timezone(share_path);
11721176
if (default_timezone)
11731177
{
11741178
snprintf(repltok, sizeof(repltok), "timezone = '%s'",

src/interfaces/ecpg/pgtypeslib/dt_common.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,7 @@ EncodeDateTime(struct tm *tm, fsec_t fsec, bool print_tz, int tz, const char *tz
833833
/*
834834
* Note: the uses of %.*s in this function would be risky if the
835835
* timezone names ever contain non-ASCII characters. However, all
836-
* TZ abbreviations in the Olson database are plain ASCII.
836+
* TZ abbreviations in the IANA database are plain ASCII.
837837
*/
838838

839839
if (print_tz)

0 commit comments

Comments
 (0)