15
15
*
16
16
*
17
17
* IDENTIFICATION
18
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
18
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.168 2004/12/02 02:45:07 tgl Exp $
19
19
*
20
20
*-------------------------------------------------------------------------
21
21
*/
@@ -3459,6 +3459,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
3459
3459
char * match ;
3460
3460
int pos ,
3461
3461
match_pos ,
3462
+ prev_pos ,
3463
+ prev_match_pos ,
3462
3464
paren_depth ;
3463
3465
char * patt ;
3464
3466
char * rest ;
@@ -3519,11 +3521,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
3519
3521
3520
3522
/* OK, allocate space for pattern */
3521
3523
match = palloc (strlen (patt ) + 1 );
3522
- match_pos = 0 ;
3524
+ prev_match_pos = match_pos = 0 ;
3523
3525
3524
3526
/* note start at pos 1 to skip leading ^ */
3525
- for (pos = 1 ; patt [pos ]; pos ++ )
3527
+ for (prev_pos = pos = 1 ; patt [pos ]; )
3526
3528
{
3529
+ int len ;
3530
+
3527
3531
/*
3528
3532
* Check for characters that indicate multiple possible matches
3529
3533
* here. XXX I suspect isalpha() is not an adequately
@@ -3537,6 +3541,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
3537
3541
(case_insensitive && isalpha ((unsigned char ) patt [pos ])))
3538
3542
break ;
3539
3543
3544
+ /*
3545
+ * In AREs, backslash followed by alphanumeric is an escape, not
3546
+ * a quoted character. Must treat it as having multiple possible
3547
+ * matches.
3548
+ */
3549
+ if (patt [pos ] == '\\' && isalnum ((unsigned char ) patt [pos + 1 ]))
3550
+ break ;
3551
+
3540
3552
/*
3541
3553
* Check for quantifiers. Except for +, this means the preceding
3542
3554
* character is optional, so we must remove it from the prefix
@@ -3546,14 +3558,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
3546
3558
patt [pos ] == '?' ||
3547
3559
patt [pos ] == '{' )
3548
3560
{
3549
- if (match_pos > 0 )
3550
- match_pos -- ;
3551
- pos -- ;
3561
+ match_pos = prev_match_pos ;
3562
+ pos = prev_pos ;
3552
3563
break ;
3553
3564
}
3554
3565
if (patt [pos ] == '+' )
3555
3566
{
3556
- pos -- ;
3567
+ pos = prev_pos ;
3557
3568
break ;
3558
3569
}
3559
3570
if (patt [pos ] == '\\' )
@@ -3563,7 +3574,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
3563
3574
if (patt [pos ] == '\0' )
3564
3575
break ;
3565
3576
}
3566
- match [match_pos ++ ] = patt [pos ];
3577
+ /* save position in case we need to back up on next loop cycle */
3578
+ prev_match_pos = match_pos ;
3579
+ prev_pos = pos ;
3580
+ /* must use encoding-aware processing here */
3581
+ len = pg_mblen (& patt [pos ]);
3582
+ memcpy (& match [match_pos ], & patt [pos ], len );
3583
+ match_pos += len ;
3584
+ pos += len ;
3567
3585
}
3568
3586
3569
3587
match [match_pos ] = '\0' ;
0 commit comments