Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c10f830

Browse files
committed
Make canonicalize_path() more canonical.
Teach canonicalize_path() how to strip all unnecessary uses of "." and "..", replacing the previous ad-hoc code that got rid of only some such cases. In particular, we can always remove all such uses from absolute paths. The proximate reason to do this is that Windows rejects paths involving ".." in some cases (in particular, you can't put one in a symlink), so we ought to be sure we don't use ".." unnecessarily. Moreover, it seems like good cleanup on general principles. There is other path-munging code that could be simplified now, but we'll leave that for followup work. It is tempting to call this a bug fix and back-patch it. On the other hand, the misbehavior can only be reached if a highly privileged user does something dubious, so it's not unreasonable to say "so don't do that". And this patch could result in unexpected behavioral changes, in case anybody was expecting uses of ".." to stay put. So at least for now, just put it in HEAD. Shenhao Wang, editorialized a bit by me Discussion: https://postgr.es/m/OSBPR01MB4214FA221FFE046F11F2AD74F2D49@OSBPR01MB4214.jpnprd01.prod.outlook.com
1 parent c89f409 commit c10f830

File tree

7 files changed

+387
-54
lines changed

7 files changed

+387
-54
lines changed

contrib/adminpack/expected/adminpack.out

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ SELECT pg_file_write(current_setting('data_directory') || '/test_file4', 'test4'
5151
(1 row)
5252

5353
SELECT pg_file_write(current_setting('data_directory') || '/../test_file4', 'test4', false);
54-
ERROR: reference to parent directory ("..") not allowed
54+
ERROR: absolute path not allowed
5555
RESET ROLE;
5656
REVOKE EXECUTE ON FUNCTION pg_file_write(text,text,bool) FROM regress_user1;
5757
REVOKE pg_read_all_settings FROM regress_user1;

src/port/path.c

+205-53
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@
4646

4747
static void make_relative_path(char *ret_path, const char *target_path,
4848
const char *bin_path, const char *my_exec_path);
49-
static void trim_directory(char *path);
49+
static char *trim_directory(char *path);
5050
static void trim_trailing_separator(char *path);
51+
static char *append_subdir_to_path(char *path, char *subdir);
5152

5253

5354
/*
@@ -222,13 +223,9 @@ join_path_components(char *ret_path,
222223
strlcpy(ret_path, head, MAXPGPATH);
223224

224225
/*
225-
* Remove any leading "." in the tail component.
226-
*
227-
* Note: we used to try to remove ".." as well, but that's tricky to get
228-
* right; now we just leave it to be done by canonicalize_path() later.
226+
* We used to try to simplify some cases involving "." and "..", but now
227+
* we just leave that to be done by canonicalize_path() later.
229228
*/
230-
while (tail[0] == '.' && IS_DIR_SEP(tail[1]))
231-
tail += 2;
232229

233230
if (*tail)
234231
{
@@ -241,23 +238,39 @@ join_path_components(char *ret_path,
241238
}
242239

243240

241+
/* State-machine states for canonicalize_path */
242+
typedef enum
243+
{
244+
ABSOLUTE_PATH_INIT, /* Just past the leading '/' (and Windows
245+
* drive name if any) of an absolute path */
246+
ABSOLUTE_WITH_N_DEPTH, /* We collected 'pathdepth' directories in an
247+
* absolute path */
248+
RELATIVE_PATH_INIT, /* At start of a relative path */
249+
RELATIVE_WITH_N_DEPTH, /* We collected 'pathdepth' directories in a
250+
* relative path */
251+
RELATIVE_WITH_PARENT_REF /* Relative path containing only double-dots */
252+
} canonicalize_state;
253+
244254
/*
245255
* Clean up path by:
246256
* o make Win32 path use Unix slashes
247257
* o remove trailing quote on Win32
248258
* o remove trailing slash
249-
* o remove duplicate adjacent separators
250-
* o remove trailing '.'
251-
* o process trailing '..' ourselves
259+
* o remove duplicate (adjacent) separators
260+
* o remove '.' (unless path reduces to only '.')
261+
* o process '..' ourselves, removing it if possible
252262
*/
253263
void
254264
canonicalize_path(char *path)
255265
{
256266
char *p,
257267
*to_p;
258268
char *spath;
269+
char *parsed;
270+
char *unparse;
259271
bool was_sep = false;
260-
int pending_strips;
272+
canonicalize_state state;
273+
int pathdepth = 0; /* counts collected regular directory names */
261274

262275
#ifdef WIN32
263276

@@ -308,60 +321,173 @@ canonicalize_path(char *path)
308321
*to_p = '\0';
309322

310323
/*
311-
* Remove any trailing uses of "." and process ".." ourselves
324+
* Remove any uses of "." and process ".." ourselves
312325
*
313326
* Note that "/../.." should reduce to just "/", while "../.." has to be
314-
* kept as-is. In the latter case we put back mistakenly trimmed ".."
315-
* components below. Also note that we want a Windows drive spec to be
316-
* visible to trim_directory(), but it's not part of the logic that's
317-
* looking at the name components; hence distinction between path and
318-
* spath.
327+
* kept as-is. Also note that we want a Windows drive spec to be visible
328+
* to trim_directory(), but it's not part of the logic that's looking at
329+
* the name components; hence distinction between path and spath.
330+
*
331+
* This loop overwrites the path in-place. This is safe since we'll never
332+
* make the path longer. "unparse" points to where we are reading the
333+
* path, "parse" to where we are writing.
319334
*/
320335
spath = skip_drive(path);
321-
pending_strips = 0;
322-
for (;;)
336+
if (*spath == '\0')
337+
return; /* empty path is returned as-is */
338+
339+
if (*spath == '/')
323340
{
324-
int len = strlen(spath);
341+
state = ABSOLUTE_PATH_INIT;
342+
/* Skip the leading slash for absolute path */
343+
parsed = unparse = (spath + 1);
344+
}
345+
else
346+
{
347+
state = RELATIVE_PATH_INIT;
348+
parsed = unparse = spath;
349+
}
325350

326-
if (len >= 2 && strcmp(spath + len - 2, "/.") == 0)
327-
trim_directory(path);
328-
else if (strcmp(spath, ".") == 0)
351+
while (*unparse != '\0')
352+
{
353+
char *unparse_next;
354+
bool is_double_dot;
355+
356+
/* Split off this dir name, and set unparse_next to the next one */
357+
unparse_next = unparse;
358+
while (*unparse_next && *unparse_next != '/')
359+
unparse_next++;
360+
if (*unparse_next != '\0')
361+
*unparse_next++ = '\0';
362+
363+
/* Identify type of this dir name */
364+
if (strcmp(unparse, ".") == 0)
329365
{
330-
/* Want to leave "." alone, but "./.." has to become ".." */
331-
if (pending_strips > 0)
332-
*spath = '\0';
333-
break;
366+
/* We can ignore "." components in all cases */
367+
unparse = unparse_next;
368+
continue;
334369
}
335-
else if ((len >= 3 && strcmp(spath + len - 3, "/..") == 0) ||
336-
strcmp(spath, "..") == 0)
370+
371+
if (strcmp(unparse, "..") == 0)
372+
is_double_dot = true;
373+
else
337374
{
338-
trim_directory(path);
339-
pending_strips++;
375+
/* adjacent separators were eliminated above */
376+
Assert(*unparse != '\0');
377+
is_double_dot = false;
340378
}
341-
else if (pending_strips > 0 && *spath != '\0')
379+
380+
switch (state)
342381
{
343-
/* trim a regular directory name canceled by ".." */
344-
trim_directory(path);
345-
pending_strips--;
346-
/* foo/.. should become ".", not empty */
347-
if (*spath == '\0')
348-
strcpy(spath, ".");
382+
case ABSOLUTE_PATH_INIT:
383+
/* We can ignore ".." immediately after / */
384+
if (!is_double_dot)
385+
{
386+
/* Append first dir name (we already have leading slash) */
387+
parsed = append_subdir_to_path(parsed, unparse);
388+
state = ABSOLUTE_WITH_N_DEPTH;
389+
pathdepth++;
390+
}
391+
break;
392+
case ABSOLUTE_WITH_N_DEPTH:
393+
if (is_double_dot)
394+
{
395+
/* Remove last parsed dir */
396+
/* (trim_directory won't remove the leading slash) */
397+
*parsed = '\0';
398+
parsed = trim_directory(path);
399+
if (--pathdepth == 0)
400+
state = ABSOLUTE_PATH_INIT;
401+
}
402+
else
403+
{
404+
/* Append normal dir */
405+
*parsed++ = '/';
406+
parsed = append_subdir_to_path(parsed, unparse);
407+
pathdepth++;
408+
}
409+
break;
410+
case RELATIVE_PATH_INIT:
411+
if (is_double_dot)
412+
{
413+
/* Append irreducible double-dot (..) */
414+
parsed = append_subdir_to_path(parsed, unparse);
415+
state = RELATIVE_WITH_PARENT_REF;
416+
}
417+
else
418+
{
419+
/* Append normal dir */
420+
parsed = append_subdir_to_path(parsed, unparse);
421+
state = RELATIVE_WITH_N_DEPTH;
422+
pathdepth++;
423+
}
424+
break;
425+
case RELATIVE_WITH_N_DEPTH:
426+
if (is_double_dot)
427+
{
428+
/* Remove last parsed dir */
429+
*parsed = '\0';
430+
parsed = trim_directory(path);
431+
if (--pathdepth == 0)
432+
{
433+
/*
434+
* If the output path is now empty, we're back to the
435+
* INIT state. However, we could have processed a
436+
* path like "../dir/.." and now be down to "..", in
437+
* which case enter the correct state for that.
438+
*/
439+
if (parsed == spath)
440+
state = RELATIVE_PATH_INIT;
441+
else
442+
state = RELATIVE_WITH_PARENT_REF;
443+
}
444+
}
445+
else
446+
{
447+
/* Append normal dir */
448+
*parsed++ = '/';
449+
parsed = append_subdir_to_path(parsed, unparse);
450+
pathdepth++;
451+
}
452+
break;
453+
case RELATIVE_WITH_PARENT_REF:
454+
if (is_double_dot)
455+
{
456+
/* Append next irreducible double-dot (..) */
457+
*parsed++ = '/';
458+
parsed = append_subdir_to_path(parsed, unparse);
459+
}
460+
else
461+
{
462+
/* Append normal dir */
463+
*parsed++ = '/';
464+
parsed = append_subdir_to_path(parsed, unparse);
465+
466+
/*
467+
* We can now start counting normal dirs. But if later
468+
* double-dots make us remove this dir again, we'd better
469+
* revert to RELATIVE_WITH_PARENT_REF not INIT state.
470+
*/
471+
state = RELATIVE_WITH_N_DEPTH;
472+
pathdepth = 1;
473+
}
474+
break;
349475
}
350-
else
351-
break;
352-
}
353476

354-
if (pending_strips > 0)
355-
{
356-
/*
357-
* We could only get here if path is now totally empty (other than a
358-
* possible drive specifier on Windows). We have to put back one or
359-
* more ".."'s that we took off.
360-
*/
361-
while (--pending_strips > 0)
362-
strcat(path, "../");
363-
strcat(path, "..");
477+
unparse = unparse_next;
364478
}
479+
480+
/*
481+
* If our output path is empty at this point, insert ".". We don't want
482+
* to do this any earlier because it'd result in an extra dot in corner
483+
* cases such as "../dir/..". Since we rejected the wholly-empty-path
484+
* case above, there is certainly room.
485+
*/
486+
if (parsed == spath)
487+
*parsed++ = '.';
488+
489+
/* And finally, ensure the output path is nul-terminated. */
490+
*parsed = '\0';
365491
}
366492

367493
/*
@@ -865,16 +991,19 @@ get_parent_directory(char *path)
865991
* Trim trailing directory from path, that is, remove any trailing slashes,
866992
* the last pathname component, and the slash just ahead of it --- but never
867993
* remove a leading slash.
994+
*
995+
* For the convenience of canonicalize_path, the path's new end location
996+
* is returned.
868997
*/
869-
static void
998+
static char *
870999
trim_directory(char *path)
8711000
{
8721001
char *p;
8731002

8741003
path = skip_drive(path);
8751004

8761005
if (path[0] == '\0')
877-
return;
1006+
return path;
8781007

8791008
/* back up over trailing slash(es) */
8801009
for (p = path + strlen(path) - 1; IS_DIR_SEP(*p) && p > path; p--)
@@ -889,6 +1018,7 @@ trim_directory(char *path)
8891018
if (p == path && IS_DIR_SEP(*p))
8901019
p++;
8911020
*p = '\0';
1021+
return p;
8921022
}
8931023

8941024

@@ -908,3 +1038,25 @@ trim_trailing_separator(char *path)
9081038
for (p--; p > path && IS_DIR_SEP(*p); p--)
9091039
*p = '\0';
9101040
}
1041+
1042+
/*
1043+
* append_subdir_to_path
1044+
*
1045+
* Append the currently-considered subdirectory name to the output
1046+
* path in canonicalize_path. Return the new end location of the
1047+
* output path.
1048+
*
1049+
* Since canonicalize_path updates the path in-place, we must use
1050+
* memmove not memcpy, and we don't yet terminate the path with '\0'.
1051+
*/
1052+
static char *
1053+
append_subdir_to_path(char *path, char *subdir)
1054+
{
1055+
size_t len = strlen(subdir);
1056+
1057+
/* No need to copy data if path and subdir are the same. */
1058+
if (path != subdir)
1059+
memmove(path, subdir, len);
1060+
1061+
return path + len;
1062+
}

src/test/regress/expected/create_function_1.out

+4
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,7 @@ CREATE FUNCTION int44out(city_budget)
2828
AS :'regresslib'
2929
LANGUAGE C STRICT IMMUTABLE;
3030
NOTICE: argument type city_budget is only a shell
31+
CREATE FUNCTION test_canonicalize_path(text)
32+
RETURNS text
33+
AS :'regresslib'
34+
LANGUAGE C STRICT IMMUTABLE;

0 commit comments

Comments
 (0)