Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9de09c0

Browse files
committed
Move wchar2char() and char2wchar() from tsearch into /mb to be easier to
use for other modules; also move pnstrdup(). Clean up code slightly.
1 parent 3eb9da5 commit 9de09c0

File tree

8 files changed

+155
-140
lines changed

8 files changed

+155
-140
lines changed

src/backend/tsearch/ts_locale.c

+1-118
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -16,125 +16,8 @@
1616
#include "tsearch/ts_locale.h"
1717
#include "tsearch/ts_public.h"
1818

19-
2019
#ifdef USE_WIDE_UPPER_LOWER
2120

22-
/*
23-
* wchar2char --- convert wide characters to multibyte format
24-
*
25-
* This has the same API as the standard wcstombs() function; in particular,
26-
* tolen is the maximum number of bytes to store at *to, and *from must be
27-
* zero-terminated. The output will be zero-terminated iff there is room.
28-
*/
29-
size_t
30-
wchar2char(char *to, const wchar_t *from, size_t tolen)
31-
{
32-
if (tolen == 0)
33-
return 0;
34-
35-
#ifdef WIN32
36-
if (GetDatabaseEncoding() == PG_UTF8)
37-
{
38-
int r;
39-
40-
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
41-
NULL, NULL);
42-
43-
if (r <= 0)
44-
return (size_t) -1;
45-
46-
Assert(r <= tolen);
47-
48-
/* Microsoft counts the zero terminator in the result */
49-
return r - 1;
50-
}
51-
#endif /* WIN32 */
52-
53-
return wcstombs(to, from, tolen);
54-
}
55-
56-
/*
57-
* char2wchar --- convert multibyte characters to wide characters
58-
*
59-
* This has almost the API of mbstowcs(), except that *from need not be
60-
* null-terminated; instead, the number of input bytes is specified as
61-
* fromlen. Also, we ereport() rather than returning -1 for invalid
62-
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
63-
* The output will be zero-terminated iff there is room.
64-
*/
65-
size_t
66-
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
67-
{
68-
if (tolen == 0)
69-
return 0;
70-
71-
#ifdef WIN32
72-
if (GetDatabaseEncoding() == PG_UTF8)
73-
{
74-
int r;
75-
76-
/* stupid Microsloth API does not work for zero-length input */
77-
if (fromlen == 0)
78-
r = 0;
79-
else
80-
{
81-
r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
82-
83-
if (r <= 0)
84-
{
85-
/* see notes in oracle_compat.c about error reporting */
86-
pg_verifymbstr(from, fromlen, false);
87-
ereport(ERROR,
88-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
89-
errmsg("invalid multibyte character for locale"),
90-
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
91-
}
92-
}
93-
94-
Assert(r < tolen);
95-
to[r] = 0;
96-
97-
return r;
98-
}
99-
#endif /* WIN32 */
100-
101-
if (lc_ctype_is_c())
102-
{
103-
/*
104-
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
105-
* allocated with sufficient space
106-
*/
107-
return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
108-
}
109-
else
110-
{
111-
/*
112-
* mbstowcs requires ending '\0'
113-
*/
114-
char *str = pnstrdup(from, fromlen);
115-
size_t result;
116-
117-
result = mbstowcs(to, str, tolen);
118-
119-
pfree(str);
120-
121-
if (result == (size_t) -1)
122-
{
123-
pg_verifymbstr(from, fromlen, false);
124-
ereport(ERROR,
125-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
126-
errmsg("invalid multibyte character for locale"),
127-
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
128-
}
129-
130-
if (result < tolen)
131-
to[result] = 0;
132-
133-
return result;
134-
}
135-
}
136-
137-
13821
int
13922
t_isdigit(const char *ptr)
14023
{

src/backend/tsearch/ts_utils.c

+1-11
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key)
153153
bsearch(&key, s->stop, s->len,
154154
sizeof(char *), comparestr)) ? true : false;
155155
}
156-
157-
char *
158-
pnstrdup(const char *in, int len)
159-
{
160-
char *out = palloc(len + 1);
161-
162-
memcpy(out, in, len);
163-
out[len] = '\0';
164-
return out;
165-
}

src/backend/utils/mb/mbutils.c

+129-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
66
*
7-
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $
7+
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $
88
*/
99
#include "postgres.h"
1010

@@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
555555
return result;
556556
}
557557

558+
559+
560+
#ifdef USE_WIDE_UPPER_LOWER
561+
562+
/*
563+
* wchar2char --- convert wide characters to multibyte format
564+
*
565+
* This has the same API as the standard wcstombs() function; in particular,
566+
* tolen is the maximum number of bytes to store at *to, and *from must be
567+
* zero-terminated. The output will be zero-terminated iff there is room.
568+
*/
569+
size_t
570+
wchar2char(char *to, const wchar_t *from, size_t tolen)
571+
{
572+
size_t result;
573+
574+
if (tolen == 0)
575+
return 0;
576+
577+
#ifdef WIN32
578+
/*
579+
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
580+
* and for some reason mbstowcs and wcstombs won't do this for us,
581+
* so we use MultiByteToWideChar().
582+
*/
583+
if (GetDatabaseEncoding() == PG_UTF8)
584+
{
585+
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
586+
NULL, NULL);
587+
/* A zero return is failure */
588+
if (result <= 0)
589+
result = -1;
590+
else
591+
{
592+
Assert(result <= tolen);
593+
/* Microsoft counts the zero terminator in the result */
594+
result--;
595+
}
596+
}
597+
else
598+
#endif /* WIN32 */
599+
result = wcstombs(to, from, tolen);
600+
return result;
601+
}
602+
603+
/*
604+
* char2wchar --- convert multibyte characters to wide characters
605+
*
606+
* This has almost the API of mbstowcs(), except that *from need not be
607+
* null-terminated; instead, the number of input bytes is specified as
608+
* fromlen. Also, we ereport() rather than returning -1 for invalid
609+
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
610+
* The output will be zero-terminated iff there is room.
611+
*/
612+
size_t
613+
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
614+
{
615+
size_t result;
616+
617+
if (tolen == 0)
618+
return 0;
619+
620+
#ifdef WIN32
621+
/* See WIN32 "Unicode" comment above */
622+
if (GetDatabaseEncoding() == PG_UTF8)
623+
{
624+
/* Win32 API does not work for zero-length input */
625+
if (fromlen == 0)
626+
result = 0;
627+
else
628+
{
629+
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
630+
/* A zero return is failure */
631+
if (result == 0)
632+
result = -1;
633+
}
634+
635+
if (result != -1)
636+
{
637+
Assert(result < tolen);
638+
/* Append trailing null wchar (MultiByteToWideChar() does not) */
639+
to[result] = 0;
640+
}
641+
}
642+
else
643+
#endif /* WIN32 */
644+
{
645+
if (lc_ctype_is_c())
646+
{
647+
/*
648+
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
649+
* allocated with sufficient space
650+
*/
651+
result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
652+
}
653+
else
654+
{
655+
/* mbstowcs requires ending '\0' */
656+
char *str = pnstrdup(from, fromlen);
657+
658+
result = mbstowcs(to, str, tolen);
659+
pfree(str);
660+
}
661+
}
662+
663+
if (result == -1)
664+
{
665+
/*
666+
* Invalid multibyte character encountered. We try to give a useful
667+
* error message by letting pg_verifymbstr check the string. But it's
668+
* possible that the string is OK to us, and not OK to mbstowcs ---
669+
* this suggests that the LC_CTYPE locale is different from the
670+
* database encoding. Give a generic error message if verifymbstr
671+
* can't find anything wrong.
672+
*/
673+
pg_verifymbstr(from, fromlen, false); /* might not return */
674+
/* but if it does ... */
675+
ereport(ERROR,
676+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
677+
errmsg("invalid multibyte character for locale"),
678+
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
679+
}
680+
681+
return result;
682+
}
683+
684+
#endif
685+
558686
/* convert a multibyte string to a wchar */
559687
int
560688
pg_mb2wchar(const char *from, pg_wchar *to)

src/backend/utils/mmgr/mcxt.c

+13-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $
17+
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -624,6 +624,18 @@ repalloc(void *pointer, Size size)
624624
pointer, size);
625625
}
626626

627+
/* Like pstrdup(), but append null byte */
628+
char *
629+
pnstrdup(const char *in, int len)
630+
{
631+
char *out = palloc(len + 1);
632+
633+
memcpy(out, in, len);
634+
out[len] = '\0';
635+
return out;
636+
}
637+
638+
627639
/*
628640
* MemoryContextSwitchTo
629641
* Returns the current context; installs the given context.

src/include/mb/pg_wchar.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $
9+
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
1010
*
1111
* NOTES
1212
* This is used both by the backend and by libpq, but should not be
@@ -362,6 +362,11 @@ extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
362362
extern int pg_encoding_max_length(int encoding);
363363
extern int pg_database_encoding_max_length(void);
364364

365+
#ifdef USE_WIDE_UPPER_LOWER
366+
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
367+
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
368+
#endif
369+
365370
extern void SetDefaultClientEncoding(void);
366371
extern int SetClientEncoding(int encoding, bool doit);
367372
extern void InitializeClientEncoding(void);

src/include/tsearch/ts_locale.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
77
*
8-
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $
8+
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $
99
*
1010
*-------------------------------------------------------------------------
1111
*/
@@ -33,9 +33,6 @@
3333

3434
#ifdef USE_WIDE_UPPER_LOWER
3535

36-
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
37-
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
38-
3936
extern int t_isdigit(const char *ptr);
4037
extern int t_isspace(const char *ptr);
4138
extern int t_isalpha(const char *ptr);

src/include/tsearch/ts_public.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*
77
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
88
*
9-
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
9+
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -62,8 +62,6 @@ typedef struct
6262
extern char *get_tsearch_config_filename(const char *basename,
6363
const char *extension);
6464

65-
extern char *pnstrdup(const char *in, int len);
66-
6765
/*
6866
* Often useful stopword list management
6967
*/

0 commit comments

Comments
 (0)