Move wchar2char() and char2wchar() from tsearch into /mb to be easier to

bmomjian · bmomjian · commit 9de09c087d63 · 2008-06-18T18:42:54.000Z
use for other modules;  also move pnstrdup().

Clean up code slightly.
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -16,125 +16,8 @@
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_public.h"
 
-
 #ifdef USE_WIDE_UPPER_LOWER
 
-/*
- * wchar2char --- convert wide characters to multibyte format
- *
- * This has the same API as the standard wcstombs() function; in particular,
- * tolen is the maximum number of bytes to store at *to, and *from must be
- * zero-terminated.  The output will be zero-terminated iff there is room.
- */
-size_t
-wchar2char(char *to, const wchar_t *from, size_t tolen)
-{
-	if (tolen == 0)
-		return 0;
-
-#ifdef WIN32
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		int			r;
-
-		r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
-								NULL, NULL);
-
-		if (r <= 0)
-			return (size_t) -1;
-
-		Assert(r <= tolen);
-
-		/* Microsoft counts the zero terminator in the result */
-		return r - 1;
-	}
-#endif   /* WIN32 */
-
-	return wcstombs(to, from, tolen);
-}
-
-/*
- * char2wchar --- convert multibyte characters to wide characters
- *
- * This has almost the API of mbstowcs(), except that *from need not be
- * null-terminated; instead, the number of input bytes is specified as
- * fromlen.  Also, we ereport() rather than returning -1 for invalid
- * input encoding.	tolen is the maximum number of wchar_t's to store at *to.
- * The output will be zero-terminated iff there is room.
- */
-size_t
-char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
-{
-	if (tolen == 0)
-		return 0;
-
-#ifdef WIN32
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		int			r;
-
-		/* stupid Microsloth API does not work for zero-length input */
-		if (fromlen == 0)
-			r = 0;
-		else
-		{
-			r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
-
-			if (r <= 0)
-			{
-				/* see notes in oracle_compat.c about error reporting */
-				pg_verifymbstr(from, fromlen, false);
-				ereport(ERROR,
-						(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-						 errmsg("invalid multibyte character for locale"),
-						 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-			}
-		}
-
-		Assert(r < tolen);
-		to[r] = 0;
-
-		return r;
-	}
-#endif   /* WIN32 */
-
-	if (lc_ctype_is_c())
-	{
-		/*
-		 * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
-		 * allocated with sufficient space
-		 */
-		return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
-	}
-	else
-	{
-		/*
-		 * mbstowcs requires ending '\0'
-		 */
-		char	   *str = pnstrdup(from, fromlen);
-		size_t		result;
-
-		result = mbstowcs(to, str, tolen);
-
-		pfree(str);
-
-		if (result == (size_t) -1)
-		{
-			pg_verifymbstr(from, fromlen, false);
-			ereport(ERROR,
-					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-					 errmsg("invalid multibyte character for locale"),
-					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-		}
-
-		if (result < tolen)
-			to[result] = 0;
-
-		return result;
-	}
-}
-
-
 int
 t_isdigit(const char *ptr)
 {
diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key)
 			bsearch(&key, s->stop, s->len,
 					sizeof(char *), comparestr)) ? true : false;
 }
-
-char *
-pnstrdup(const char *in, int len)
-{
-	char	   *out = palloc(len + 1);
-
-	memcpy(out, in, len);
-	out[len] = '\0';
-	return out;
-}
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
@@ -4,7 +4,7 @@
  * (currently mule internal code (mic) is used)
  * Tatsuo Ishii
  *
- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $
  */
 #include "postgres.h"
 
@@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
 	return result;
 }
 
+
+
+#ifdef USE_WIDE_UPPER_LOWER
+
+/*
+ * wchar2char --- convert wide characters to multibyte format
+ *
+ * This has the same API as the standard wcstombs() function; in particular,
+ * tolen is the maximum number of bytes to store at *to, and *from must be
+ * zero-terminated.  The output will be zero-terminated iff there is room.
+ */
+size_t
+wchar2char(char *to, const wchar_t *from, size_t tolen)
+{
+	size_t result;
+	
+	if (tolen == 0)
+		return 0;
+
+#ifdef WIN32
+	/*
+	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
+	 * and for some reason mbstowcs and wcstombs won't do this for us,
+	 * so we use MultiByteToWideChar().
+	 */
+	if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
+								NULL, NULL);
+		/* A zero return is failure */
+		if (result <= 0)
+			result = -1;
+		else
+		{
+			Assert(result <= tolen);
+			/* Microsoft counts the zero terminator in the result */
+			result--;
+		}
+	}
+	else
+#endif   /* WIN32 */
+		result = wcstombs(to, from, tolen);
+	return result;
+}
+
+/*
+ * char2wchar --- convert multibyte characters to wide characters
+ *
+ * This has almost the API of mbstowcs(), except that *from need not be
+ * null-terminated; instead, the number of input bytes is specified as
+ * fromlen.  Also, we ereport() rather than returning -1 for invalid
+ * input encoding.	tolen is the maximum number of wchar_t's to store at *to.
+ * The output will be zero-terminated iff there is room.
+ */
+size_t
+char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
+{
+	size_t		result;
+
+	if (tolen == 0)
+		return 0;
+
+#ifdef WIN32
+	/* See WIN32 "Unicode" comment above */
+	if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		/* Win32 API does not work for zero-length input */
+		if (fromlen == 0)
+			result = 0;
+		else
+		{
+			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
+			/* A zero return is failure */
+			if (result == 0)
+				result = -1;
+		}
+
+		if (result != -1)
+		{
+			Assert(result < tolen);
+			/* Append trailing null wchar (MultiByteToWideChar() does not) */
+			to[result] = 0;
+		}
+	}
+	else
+#endif   /* WIN32 */
+	{
+		if (lc_ctype_is_c())
+		{
+			/*
+			 * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
+			 * allocated with sufficient space
+			 */
+			result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
+		}
+		else
+		{
+			/* mbstowcs requires ending '\0' */
+			char	   *str = pnstrdup(from, fromlen);
+
+			result = mbstowcs(to, str, tolen);
+			pfree(str);
+		}
+	}
+
+	if (result == -1)
+	{
+		/*
+		 * Invalid multibyte character encountered.  We try to give a useful
+		 * error message by letting pg_verifymbstr check the string.  But it's
+		 * possible that the string is OK to us, and not OK to mbstowcs ---
+		 * this suggests that the LC_CTYPE locale is different from the
+		 * database encoding.  Give a generic error message if verifymbstr
+		 * can't find anything wrong.
+		 */
+		pg_verifymbstr(from, fromlen, false);	/* might not return */
+		/* but if it does ... */
+		ereport(ERROR,
+				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+				 errmsg("invalid multibyte character for locale"),
+				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+	}	
+
+	return result;
+}
+
+#endif
+
 /* convert a multibyte string to a wchar */
 int
 pg_mb2wchar(const char *from, pg_wchar *to)
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -624,6 +624,18 @@ repalloc(void *pointer, Size size)
 												 pointer, size);
 }
 
+/* Like pstrdup(), but append null byte */
+char *
+pnstrdup(const char *in, int len)
+{
+	char	   *out = palloc(len + 1);
+
+	memcpy(out, in, len);
+	out[len] = '\0';
+	return out;
+}
+
+
 /*
  * MemoryContextSwitchTo
  *		Returns the current context; installs the given context.
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
  *
  *	NOTES
  *		This is used both by the backend and by libpq, but should not be
@@ -362,6 +362,11 @@ extern int	pg_mbcharcliplen(const char *mbstr, int len, int imit);
 extern int	pg_encoding_max_length(int encoding);
 extern int	pg_database_encoding_max_length(void);
 
+#ifdef USE_WIDE_UPPER_LOWER
+extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
+extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
+#endif
+
 extern void SetDefaultClientEncoding(void);
 extern int	SetClientEncoding(int encoding, bool doit);
 extern void InitializeClientEncoding(void);
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2008, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -33,9 +33,6 @@
 
 #ifdef USE_WIDE_UPPER_LOWER
 
-extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
-extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
-
 extern int	t_isdigit(const char *ptr);
 extern int	t_isspace(const char *ptr);
 extern int	t_isalpha(const char *ptr);
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1998-2008, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,8 +62,6 @@ typedef struct
 extern char *get_tsearch_config_filename(const char *basename,
 							const char *extension);
 
-extern char *pnstrdup(const char *in, int len);
-
 /*
  * Often useful stopword list management
  */
diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`*`
`8`	`8`	`*`
`9`	`9`	`* IDENTIFICATION`
`10`		`- * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $`
	`10`	`+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -153,13 +153,3 @@ searchstoplist(StopList s, char key)`
`153`	`153`	`bsearch(&key, s->stop, s->len,`
`154`	`154`	`sizeof(char *), comparestr)) ? true : false;`
`155`	`155`	`}`
`156`		`-`
`157`		`-char *`
`158`		`-pnstrdup(const char *in, int len)`
`159`		`-{`
`160`		`- char *out = palloc(len + 1);`
`161`		`-`
`162`		`- memcpy(out, in, len);`
`163`		`- out[len] = '\0';`
`164`		`- return out;`
`165`		`-}`
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`	`*`
`6`	`6`	`* Copyright (c) 1998-2008, PostgreSQL Global Development Group`
`7`	`7`	`*`
`8`		`- * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $`
	`8`	`+ * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $`
`9`	`9`	`*`
`10`	`10`	`*-------------------------------------------------------------------------`
`11`	`11`	`*/`
`@@ -33,9 +33,6 @@`
`33`	`33`
`34`	`34`	`#ifdef USE_WIDE_UPPER_LOWER`
`35`	`35`
`36`		`-extern size_t wchar2char(char to, const wchar_t from, size_t tolen);`
`37`		`-extern size_t char2wchar(wchar_t to, size_t tolen, const char from, size_t fromlen);`
`38`		`-`
`39`	`36`	`extern int t_isdigit(const char *ptr);`
`40`	`37`	`extern int t_isspace(const char *ptr);`
`41`	`38`	`extern int t_isalpha(const char *ptr);`
Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@`
`6`	`6`	`*`
`7`	`7`	`* Copyright (c) 1998-2008, PostgreSQL Global Development Group`
`8`	`8`	`*`
`9`		`- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $`
	`9`	`+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $`
`10`	`10`	`*`
`11`	`11`	`*-------------------------------------------------------------------------`
`12`	`12`	`*/`
`@@ -62,8 +62,6 @@ typedef struct`
`62`	`62`	`extern char get_tsearch_config_filename(const char basename,`
`63`	`63`	`const char *extension);`
`64`	`64`
`65`		`-extern char pnstrdup(const char in, int len);`
`66`		`-`
`67`	`65`	`/*`
`68`	`66`	`* Often useful stopword list management`
`69`	`67`	`*/`