21
21
#include "mb/pg_wchar.h"
22
22
23
23
static const pg_case_map * find_case_map (pg_wchar ucs );
24
- static size_t convert_case (char * dst , size_t dstsize , const char * src ,
25
- ssize_t srclen , CaseKind casekind );
24
+ static size_t convert_case (char * dst , size_t dstsize , const char * src , ssize_t srclen ,
25
+ CaseKind str_casekind , WordBoundaryNext wbnext ,
26
+ void * wbstate );
26
27
27
28
pg_wchar
28
29
unicode_lowercase_simple (pg_wchar code )
@@ -67,7 +68,40 @@ unicode_uppercase_simple(pg_wchar code)
67
68
size_t
68
69
unicode_strlower (char * dst , size_t dstsize , const char * src , ssize_t srclen )
69
70
{
70
- return convert_case (dst , dstsize , src , srclen , CaseLower );
71
+ return convert_case (dst , dstsize , src , srclen , CaseLower , NULL , NULL );
72
+ }
73
+
74
+ /*
75
+ * unicode_strtitle()
76
+ *
77
+ * Convert src to titlecase, and return the result length (not including
78
+ * terminating NUL).
79
+ *
80
+ * String src must be encoded in UTF-8. If srclen < 0, src must be
81
+ * NUL-terminated.
82
+ *
83
+ * Result string is stored in dst, truncating if larger than dstsize. If
84
+ * dstsize is greater than the result length, dst will be NUL-terminated;
85
+ * otherwise not.
86
+ *
87
+ * If dstsize is zero, dst may be NULL. This is useful for calculating the
88
+ * required buffer size before allocating.
89
+ *
90
+ * Titlecasing requires knowledge about word boundaries, which is provided by
91
+ * the callback wbnext. A word boundary is the offset of the start of a word
92
+ * or the offset of the character immediately following a word.
93
+ *
94
+ * The caller is expected to initialize and free the callback state
95
+ * wbstate. The callback should first return offset 0 for the first boundary;
96
+ * then the offset of each subsequent word boundary; then the total length of
97
+ * the string to indicate the final boundary.
98
+ */
99
+ size_t
100
+ unicode_strtitle (char * dst , size_t dstsize , const char * src , ssize_t srclen ,
101
+ WordBoundaryNext wbnext , void * wbstate )
102
+ {
103
+ return convert_case (dst , dstsize , src , srclen , CaseTitle , wbnext ,
104
+ wbstate );
71
105
}
72
106
73
107
/*
@@ -89,30 +123,56 @@ unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen)
89
123
size_t
90
124
unicode_strupper (char * dst , size_t dstsize , const char * src , ssize_t srclen )
91
125
{
92
- return convert_case (dst , dstsize , src , srclen , CaseUpper );
126
+ return convert_case (dst , dstsize , src , srclen , CaseUpper , NULL , NULL );
93
127
}
94
128
95
129
/*
96
- * Implement Unicode Default Case Conversion algorithm.
130
+ * If str_casekind is CaseLower or CaseUpper, map each character in the string
131
+ * for which a mapping is available.
97
132
*
98
- * Map each character in the string for which a mapping is available.
133
+ * If str_casekind is CaseTitle, maps characters found on a word boundary to
134
+ * uppercase and other characters to lowercase.
99
135
*/
100
136
static size_t
101
137
convert_case (char * dst , size_t dstsize , const char * src , ssize_t srclen ,
102
- CaseKind casekind )
138
+ CaseKind str_casekind , WordBoundaryNext wbnext , void * wbstate )
103
139
{
140
+ /* character CaseKind varies while titlecasing */
141
+ CaseKind chr_casekind = str_casekind ;
104
142
size_t srcoff = 0 ;
105
143
size_t result_len = 0 ;
144
+ size_t boundary = 0 ;
145
+
146
+ Assert ((str_casekind == CaseTitle && wbnext && wbstate ) ||
147
+ (str_casekind != CaseTitle && !wbnext && !wbstate ));
148
+
149
+ if (str_casekind == CaseTitle )
150
+ {
151
+ boundary = wbnext (wbstate );
152
+ Assert (boundary == 0 ); /* start of text is always a boundary */
153
+ }
106
154
107
155
while ((srclen < 0 || srcoff < srclen ) && src [srcoff ] != '\0' )
108
156
{
109
157
pg_wchar u1 = utf8_to_unicode ((unsigned char * ) src + srcoff );
110
158
int u1len = unicode_utf8len (u1 );
111
159
const pg_case_map * casemap = find_case_map (u1 );
112
160
161
+ if (str_casekind == CaseTitle )
162
+ {
163
+ if (srcoff == boundary )
164
+ {
165
+ chr_casekind = CaseUpper ;
166
+ boundary = wbnext (wbstate );
167
+ }
168
+ else
169
+ chr_casekind = CaseLower ;
170
+ }
171
+
172
+ /* perform mapping, update result_len, and write to dst */
113
173
if (casemap )
114
174
{
115
- pg_wchar u2 = casemap -> simplemap [casekind ];
175
+ pg_wchar u2 = casemap -> simplemap [chr_casekind ];
116
176
pg_wchar u2len = unicode_utf8len (u2 );
117
177
118
178
if (result_len + u2len <= dstsize )
0 commit comments