Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 83b6131

Browse files
committed
Improve behavior of tsearch_readline(), and remove t_readline().
Commit fbeb9da, which added the tsearch_readline APIs, left t_readline() in place as a compatibility measure. But that function has been unused and deprecated for twelve years now, so that seems like enough time to remove it. Doing so, and merging t_readline's code into tsearch_readline, aids in making several useful improvements: * The hard-wired 4K limit on line length in tsearch data files is removed, by using a StringInfo buffer instead of a fixed-size buffer. * We can buy back the per-line palloc/pfree added by 3ea7e95 in the common case where encoding conversion is not required. * We no longer need a separate pg_verify_mbstr call, as that functionality was folded into encoding conversion some time ago. (We could have done some of this stuff while keeping t_readline as a separate API, but there seems little point, since there's no reason for anyone to still be using t_readline directly.) Discussion: https://postgr.es/m/48A4FA71-524E-41B9-953A-FD04EF36E2E7@yesql.se
1 parent aca7484 commit 83b6131

File tree

3 files changed

+32
-58
lines changed

3 files changed

+32
-58
lines changed

src/backend/tsearch/dict_thesaurus.c

-5
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
286286
(errcode(ERRCODE_CONFIG_FILE_ERROR),
287287
errmsg("unexpected end of line")));
288288

289-
/*
290-
* Note: currently, tsearch_readline can't return lines exceeding 4KB,
291-
* so overflow of the word counts is impossible. But that may not
292-
* always be true, so let's check.
293-
*/
294289
if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
295290
ereport(ERROR,
296291
(errcode(ERRCODE_CONFIG_FILE_ERROR),

src/backend/tsearch/ts_locale.c

+28-50
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "postgres.h"
1515

1616
#include "catalog/pg_collation.h"
17+
#include "common/string.h"
1718
#include "storage/fd.h"
1819
#include "tsearch/ts_locale.h"
1920
#include "tsearch/ts_public.h"
@@ -128,6 +129,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
128129
return false;
129130
stp->filename = filename;
130131
stp->lineno = 0;
132+
initStringInfo(&stp->buf);
131133
stp->curline = NULL;
132134
/* Setup error traceback support for ereport() */
133135
stp->cb.callback = tsearch_readline_callback;
@@ -145,31 +147,43 @@ tsearch_readline_begin(tsearch_readline_state *stp,
145147
char *
146148
tsearch_readline(tsearch_readline_state *stp)
147149
{
148-
char *result;
150+
char *recoded;
149151

150152
/* Advance line number to use in error reports */
151153
stp->lineno++;
152154

153155
/* Clear curline, it's no longer relevant */
154156
if (stp->curline)
155157
{
156-
pfree(stp->curline);
158+
if (stp->curline != stp->buf.data)
159+
pfree(stp->curline);
157160
stp->curline = NULL;
158161
}
159162

160163
/* Collect next line, if there is one */
161-
result = t_readline(stp->fp);
162-
if (!result)
164+
if (!pg_get_line_buf(stp->fp, &stp->buf))
163165
return NULL;
164166

167+
/* Validate the input as UTF-8, then convert to DB encoding if needed */
168+
recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
169+
170+
/* Save the correctly-encoded string for possible error reports */
171+
stp->curline = recoded; /* might be equal to buf.data */
172+
165173
/*
166-
* Save a copy of the line for possible use in error reports. (We cannot
167-
* just save "result", since it's likely to get pfree'd at some point by
168-
* the caller; an error after that would try to access freed data.)
174+
* We always return a freshly pstrdup'd string. This is clearly necessary
175+
* if pg_any_to_server() returned buf.data, and we need a second copy even
176+
* if encoding conversion did occur. The caller is entitled to pfree the
177+
* returned string at any time, which would leave curline pointing to
178+
* recycled storage, causing problems if an error occurs after that point.
179+
* (It's preferable to return the result of pstrdup instead of the output
180+
* of pg_any_to_server, because the conversion result tends to be
181+
* over-allocated. Since callers might save the result string directly
182+
* into a long-lived dictionary structure, we don't want it to be a larger
183+
* palloc chunk than necessary. We'll reclaim the conversion result on
184+
* the next call.)
169185
*/
170-
stp->curline = pstrdup(result);
171-
172-
return result;
186+
return pstrdup(recoded);
173187
}
174188

175189
/*
@@ -181,11 +195,13 @@ tsearch_readline_end(tsearch_readline_state *stp)
181195
/* Suppress use of curline in any error reported below */
182196
if (stp->curline)
183197
{
184-
pfree(stp->curline);
198+
if (stp->curline != stp->buf.data)
199+
pfree(stp->curline);
185200
stp->curline = NULL;
186201
}
187202

188203
/* Release other resources */
204+
pfree(stp->buf.data);
189205
FreeFile(stp->fp);
190206

191207
/* Pop the error context stack */
@@ -203,8 +219,7 @@ tsearch_readline_callback(void *arg)
203219

204220
/*
205221
* We can't include the text of the config line for errors that occur
206-
* during t_readline() itself. This is only partly a consequence of our
207-
* arms-length use of that routine: the major cause of such errors is
222+
* during tsearch_readline() itself. The major cause of such errors is
208223
* encoding violations, and we daren't try to print error messages
209224
* containing badly-encoded data.
210225
*/
@@ -220,43 +235,6 @@ tsearch_readline_callback(void *arg)
220235
}
221236

222237

223-
/*
224-
* Read the next line from a tsearch data file (expected to be in UTF-8), and
225-
* convert it to database encoding if needed. The returned string is palloc'd.
226-
* NULL return means EOF.
227-
*
228-
* Note: direct use of this function is now deprecated. Go through
229-
* tsearch_readline() to provide better error reporting.
230-
*/
231-
char *
232-
t_readline(FILE *fp)
233-
{
234-
int len;
235-
char *recoded;
236-
char buf[4096]; /* lines must not be longer than this */
237-
238-
if (fgets(buf, sizeof(buf), fp) == NULL)
239-
return NULL;
240-
241-
len = strlen(buf);
242-
243-
/* Make sure the input is valid UTF-8 */
244-
(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
245-
246-
/* And convert */
247-
recoded = pg_any_to_server(buf, len, PG_UTF8);
248-
if (recoded == buf)
249-
{
250-
/*
251-
* conversion didn't pstrdup, so we must. We can use the length of the
252-
* original string, because no conversion was done.
253-
*/
254-
recoded = pnstrdup(recoded, len);
255-
}
256-
257-
return recoded;
258-
}
259-
260238
/*
261239
* lowerstr --- fold null-terminated string to lower case
262240
*

src/include/tsearch/ts_locale.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <ctype.h>
1616
#include <limits.h>
1717

18+
#include "lib/stringinfo.h"
1819
#include "mb/pg_wchar.h"
1920
#include "utils/pg_locale.h"
2021

@@ -33,7 +34,9 @@ typedef struct
3334
FILE *fp;
3435
const char *filename;
3536
int lineno;
36-
char *curline;
37+
StringInfoData buf; /* current input line, in UTF-8 */
38+
char *curline; /* current input line, in DB's encoding */
39+
/* curline may be NULL, or equal to buf.data, or a palloc'd string */
3740
ErrorContextCallback cb;
3841
} tsearch_readline_state;
3942

@@ -57,6 +60,4 @@ extern bool tsearch_readline_begin(tsearch_readline_state *stp,
5760
extern char *tsearch_readline(tsearch_readline_state *stp);
5861
extern void tsearch_readline_end(tsearch_readline_state *stp);
5962

60-
extern char *t_readline(FILE *fp);
61-
6263
#endif /* __TSLOCALE_H__ */

0 commit comments

Comments
 (0)