Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit aa17c06

Browse files
committed
Add function to import operating system collations
Move this logic out of initdb into a user-callable function. This simplifies the code and makes it possible to update the standard collations later on if additional operating system collations appear. Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Euler Taveira <euler@timbira.com.br>
1 parent 193a7d7 commit aa17c06

File tree

8 files changed

+229
-172
lines changed

8 files changed

+229
-172
lines changed

doc/src/sgml/charset.sgml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
496496
</para>
497497
</sect2>
498498

499-
<sect2>
499+
<sect2 id="collation-managing">
500500
<title>Managing Collations</title>
501501

502502
<para>

doc/src/sgml/func.sgml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
1919019190
in the database's default tablespace, the tablespace can be specified as 0.
1919119191
</para>
1919219192

19193+
<para>
19194+
<xref linkend="functions-admin-collation"> lists functions used to manage
19195+
collations.
19196+
</para>
19197+
19198+
<table id="functions-admin-collation">
19199+
<title>Collation Management Functions</title>
19200+
<tgroup cols="3">
19201+
<thead>
19202+
<row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row>
19203+
</thead>
19204+
19205+
<tbody>
19206+
<row>
19207+
<entry>
19208+
<indexterm><primary>pg_import_system_collations</primary></indexterm>
19209+
<literal><function>pg_import_system_collations(<parameter>if_not_exists</> <type>boolean</>, <parameter>schema</> <type>regnamespace</>)</function></literal>
19210+
</entry>
19211+
<entry><type>void</type></entry>
19212+
<entry>Import operating system collations</entry>
19213+
</row>
19214+
</tbody>
19215+
</tgroup>
19216+
</table>
19217+
19218+
<para>
19219+
<function>pg_import_system_collations</> populates the system
19220+
catalog <literal>pg_collation</literal> with collations based on all the
19221+
locales it finds on the operating system. This is
19222+
what <command>initdb</command> uses;
19223+
see <xref linkend="collation-managing"> for more details. If additional
19224+
locales are installed into the operating system later on, this function
19225+
can be run again to add collations for the new locales. In that case, the
19226+
parameter <parameter>if_not_exists</parameter> should be set to true to
19227+
skip over existing collations. The <parameter>schema</parameter>
19228+
parameter would typically be <literal>pg_catalog</literal>, but that is
19229+
not a requirement. (Collation objects based on locales that are no longer
19230+
present on the operating system are never removed by this function.)
19231+
</para>
19232+
1919319233
</sect2>
1919419234

1919519235
<sect2 id="functions-admin-index">

src/backend/catalog/pg_collation.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ Oid
4141
CollationCreate(const char *collname, Oid collnamespace,
4242
Oid collowner,
4343
int32 collencoding,
44-
const char *collcollate, const char *collctype)
44+
const char *collcollate, const char *collctype,
45+
bool if_not_exists)
4546
{
4647
Relation rel;
4748
TupleDesc tupDesc;
@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
7273
PointerGetDatum(collname),
7374
Int32GetDatum(collencoding),
7475
ObjectIdGetDatum(collnamespace)))
75-
ereport(ERROR,
76+
{
77+
if (if_not_exists)
78+
{
79+
ereport(NOTICE,
7680
(errcode(ERRCODE_DUPLICATE_OBJECT),
77-
errmsg("collation \"%s\" for encoding \"%s\" already exists",
81+
errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
7882
collname, pg_encoding_to_char(collencoding))));
83+
return InvalidOid;
84+
}
85+
else
86+
ereport(ERROR,
87+
(errcode(ERRCODE_DUPLICATE_OBJECT),
88+
errmsg("collation \"%s\" for encoding \"%s\" already exists",
89+
collname, pg_encoding_to_char(collencoding))));
90+
}
7991

8092
/*
8193
* Also forbid matching an any-encoding entry. This test of course is not
@@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace,
8698
PointerGetDatum(collname),
8799
Int32GetDatum(-1),
88100
ObjectIdGetDatum(collnamespace)))
89-
ereport(ERROR,
101+
{
102+
if (if_not_exists)
103+
{
104+
ereport(NOTICE,
105+
(errcode(ERRCODE_DUPLICATE_OBJECT),
106+
errmsg("collation \"%s\" already exists, skipping",
107+
collname)));
108+
return InvalidOid;
109+
}
110+
else
111+
ereport(ERROR,
90112
(errcode(ERRCODE_DUPLICATE_OBJECT),
91113
errmsg("collation \"%s\" already exists",
92114
collname)));
115+
}
93116

94117
/* open pg_collation */
95118
rel = heap_open(CollationRelationId, RowExclusiveLock);

src/backend/commands/collationcmds.c

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
136136
GetUserId(),
137137
GetDatabaseEncoding(),
138138
collcollate,
139-
collctype);
139+
collctype,
140+
false);
141+
142+
if (!OidIsValid(newoid))
143+
return InvalidObjectAddress;
140144

141145
ObjectAddressSet(address, CollationRelationId, newoid);
142146

@@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
177181
errmsg("collation \"%s\" already exists in schema \"%s\"",
178182
collname, get_namespace_name(nspOid))));
179183
}
184+
185+
186+
/*
187+
* "Normalize" a locale name, stripping off encoding tags such as
188+
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
189+
* -> "br_FR@euro"). Return true if a new, different name was
190+
* generated.
191+
*/
192+
pg_attribute_unused()
193+
static bool
194+
normalize_locale_name(char *new, const char *old)
195+
{
196+
char *n = new;
197+
const char *o = old;
198+
bool changed = false;
199+
200+
while (*o)
201+
{
202+
if (*o == '.')
203+
{
204+
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
205+
o++;
206+
while ((*o >= 'A' && *o <= 'Z')
207+
|| (*o >= 'a' && *o <= 'z')
208+
|| (*o >= '0' && *o <= '9')
209+
|| (*o == '-'))
210+
o++;
211+
changed = true;
212+
}
213+
else
214+
*n++ = *o++;
215+
}
216+
*n = '\0';
217+
218+
return changed;
219+
}
220+
221+
222+
Datum
223+
pg_import_system_collations(PG_FUNCTION_ARGS)
224+
{
225+
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
226+
bool if_not_exists = PG_GETARG_BOOL(0);
227+
Oid nspid = PG_GETARG_OID(1);
228+
229+
FILE *locale_a_handle;
230+
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
231+
int count = 0;
232+
#endif
233+
234+
if (!superuser())
235+
ereport(ERROR,
236+
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
237+
(errmsg("must be superuser to import system collations"))));
238+
239+
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
240+
locale_a_handle = OpenPipeStream("locale -a", "r");
241+
if (locale_a_handle == NULL)
242+
ereport(ERROR,
243+
(errcode_for_file_access(),
244+
errmsg("could not execute command \"%s\": %m",
245+
"locale -a")));
246+
247+
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
248+
{
249+
int i;
250+
size_t len;
251+
int enc;
252+
bool skip;
253+
char alias[NAMEDATALEN];
254+
255+
len = strlen(localebuf);
256+
257+
if (len == 0 || localebuf[len - 1] != '\n')
258+
{
259+
elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
260+
continue;
261+
}
262+
localebuf[len - 1] = '\0';
263+
264+
/*
265+
* Some systems have locale names that don't consist entirely of ASCII
266+
* letters (such as "bokm&aring;l" or "fran&ccedil;ais"). This is
267+
* pretty silly, since we need the locale itself to interpret the
268+
* non-ASCII characters. We can't do much with those, so we filter
269+
* them out.
270+
*/
271+
skip = false;
272+
for (i = 0; i < len; i++)
273+
{
274+
if (IS_HIGHBIT_SET(localebuf[i]))
275+
{
276+
skip = true;
277+
break;
278+
}
279+
}
280+
if (skip)
281+
{
282+
elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
283+
continue;
284+
}
285+
286+
enc = pg_get_encoding_from_locale(localebuf, false);
287+
if (enc < 0)
288+
{
289+
/* error message printed by pg_get_encoding_from_locale() */
290+
continue;
291+
}
292+
if (!PG_VALID_BE_ENCODING(enc))
293+
continue; /* ignore locales for client-only encodings */
294+
if (enc == PG_SQL_ASCII)
295+
continue; /* C/POSIX are already in the catalog */
296+
297+
count++;
298+
299+
CollationCreate(localebuf, nspid, GetUserId(), enc,
300+
localebuf, localebuf, if_not_exists);
301+
302+
CommandCounterIncrement();
303+
304+
/*
305+
* Generate aliases such as "en_US" in addition to "en_US.utf8" for
306+
* ease of use. Note that collation names are unique per encoding
307+
* only, so this doesn't clash with "en_US" for LATIN1, say.
308+
*
309+
* This always runs in "if not exists" mode, to skip aliases that
310+
* conflict with an existing locale name for the same encoding. For
311+
* example, "br_FR.iso88591" is normalized to "br_FR", both for
312+
* encoding LATIN1. But the unnormalized locale "br_FR" already
313+
* exists for LATIN1.
314+
*/
315+
if (normalize_locale_name(alias, localebuf))
316+
{
317+
CollationCreate(alias, nspid, GetUserId(), enc,
318+
localebuf, localebuf, true);
319+
CommandCounterIncrement();
320+
}
321+
}
322+
323+
ClosePipeStream(locale_a_handle);
324+
325+
if (count == 0)
326+
ereport(ERROR,
327+
(errmsg("no usable system locales were found")));
328+
#endif /* not HAVE_LOCALE_T && not WIN32 */
329+
330+
PG_RETURN_VOID();
331+
}

0 commit comments

Comments
 (0)