Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1efd5ff

Browse files
committed
Add a pg_encoding_mbcliplen() function that is just like pg_mbcliplen()
except the caller can specify the encoding to work in; this will be needed for pg_stat_statements. In passing, do some marginal efficiency hacking and clean up some comments. Also, prevent the single-byte-encoding code path from fetching one byte past the stated length of the string (this last is a bug that might need to be back-patched at some point).
1 parent 74ef810 commit 1efd5ff

File tree

2 files changed

+38
-23
lines changed

2 files changed

+38
-23
lines changed

src/backend/utils/mb/mbutils.c

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
66
*
7-
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.75 2008/11/11 03:01:20 tgl Exp $
7+
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.76 2009/01/04 18:37:35 tgl Exp $
88
*/
99
#include "postgres.h"
1010

@@ -710,14 +710,14 @@ pg_encoding_mb2wchar_with_len(int encoding,
710710
return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
711711
}
712712

713-
/* returns the byte length of a multibyte word */
713+
/* returns the byte length of a multibyte character */
714714
int
715715
pg_mblen(const char *mbstr)
716716
{
717717
return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
718718
}
719719

720-
/* returns the display length of a multibyte word */
720+
/* returns the display length of a multibyte character */
721721
int
722722
pg_dsplen(const char *mbstr)
723723
{
@@ -767,23 +767,37 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
767767

768768
/*
769769
* returns the byte length of a multibyte string
770-
* (not necessarily NULL terminated)
770+
* (not necessarily NULL terminated)
771771
* that is no longer than limit.
772-
* this function does not break multibyte word boundary.
772+
* this function does not break multibyte character boundary.
773773
*/
774774
int
775775
pg_mbcliplen(const char *mbstr, int len, int limit)
776776
{
777+
return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
778+
len, limit);
779+
}
780+
781+
/*
782+
* pg_mbcliplen with specified encoding
783+
*/
784+
int
785+
pg_encoding_mbcliplen(int encoding, const char *mbstr,
786+
int len, int limit)
787+
{
788+
mblen_converter mblen_fn;
777789
int clen = 0;
778790
int l;
779791

780792
/* optimization for single byte encoding */
781-
if (pg_database_encoding_max_length() == 1)
793+
if (pg_encoding_max_length(encoding) == 1)
782794
return cliplen(mbstr, len, limit);
783795

796+
mblen_fn = pg_wchar_table[encoding].mblen;
797+
784798
while (len > 0 && *mbstr)
785799
{
786-
l = pg_mblen(mbstr);
800+
l = (*mblen_fn) ((const unsigned char *) mbstr);
787801
if ((clen + l) > limit)
788802
break;
789803
clen += l;
@@ -797,7 +811,8 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
797811

798812
/*
799813
* Similar to pg_mbcliplen except the limit parameter specifies the
800-
* character length, not the byte length. */
814+
* character length, not the byte length.
815+
*/
801816
int
802817
pg_mbcharcliplen(const char *mbstr, int len, int limit)
803818
{
@@ -822,6 +837,18 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
822837
return clen;
823838
}
824839

840+
/* mbcliplen for any single-byte encoding */
841+
static int
842+
cliplen(const char *str, int len, int limit)
843+
{
844+
int l = 0;
845+
846+
len = Min(len, limit);
847+
while (l < len && str[l])
848+
l++;
849+
return l;
850+
}
851+
825852
void
826853
SetDatabaseEncoding(int encoding)
827854
{
@@ -884,17 +911,3 @@ pg_client_encoding(PG_FUNCTION_ARGS)
884911
Assert(ClientEncoding);
885912
return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
886913
}
887-
888-
static int
889-
cliplen(const char *str, int len, int limit)
890-
{
891-
int l = 0;
892-
const char *s;
893-
894-
for (s = str; *s; s++, l++)
895-
{
896-
if (l >= len || l >= limit)
897-
return l;
898-
}
899-
return (s - str);
900-
}

src/include/mb/pg_wchar.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.81 2009/01/01 17:23:59 momjian Exp $
9+
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.82 2009/01/04 18:37:36 tgl Exp $
1010
*
1111
* NOTES
1212
* This is used both by the backend and by libpq, but should not be
@@ -358,6 +358,8 @@ extern int pg_mic_mblen(const unsigned char *mbstr);
358358
extern int pg_mbstrlen(const char *mbstr);
359359
extern int pg_mbstrlen_with_len(const char *mbstr, int len);
360360
extern int pg_mbcliplen(const char *mbstr, int len, int limit);
361+
extern int pg_encoding_mbcliplen(int encoding, const char *mbstr,
362+
int len, int limit);
361363
extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
362364
extern int pg_encoding_max_length(int encoding);
363365
extern int pg_database_encoding_max_length(void);

0 commit comments

Comments
 (0)