Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d9d769f

Browse files
Issue python#23573: Increased performance of string search operations (str.find,
str.index, str.count, the in operator, str.split, str.partition) with arguments of different kinds (UCS1, UCS2, UCS4).
1 parent f7ef475 commit d9d769f

File tree

6 files changed

+248
-193
lines changed

6 files changed

+248
-193
lines changed

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ Release date: 2015-03-28
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #23573: Increased performance of string search operations (str.find,
14+
str.index, str.count, the in operator, str.split, str.partition) with
15+
arguments of different kinds (UCS1, UCS2, UCS4).
16+
1317
- Issue #23753: Python doesn't support anymore platforms without stat() or
1418
fstat(), these functions are always required.
1519

Objects/bytearrayobject.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,7 +1142,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
11421142
char byte;
11431143
Py_buffer subbuf;
11441144
const char *sub;
1145-
Py_ssize_t sub_len;
1145+
Py_ssize_t len, sub_len;
11461146
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
11471147
Py_ssize_t res;
11481148

@@ -1161,15 +1161,30 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
11611161
sub = &byte;
11621162
sub_len = 1;
11631163
}
1164+
len = PyByteArray_GET_SIZE(self);
11641165

1165-
if (dir > 0)
1166-
res = stringlib_find_slice(
1167-
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1168-
sub, sub_len, start, end);
1169-
else
1170-
res = stringlib_rfind_slice(
1171-
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1172-
sub, sub_len, start, end);
1166+
ADJUST_INDICES(start, end, len);
1167+
if (end - start < sub_len)
1168+
res = -1;
1169+
else if (sub_len == 1) {
1170+
unsigned char needle = *sub;
1171+
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1172+
res = stringlib_fastsearch_memchr_1char(
1173+
PyByteArray_AS_STRING(self) + start, end - start,
1174+
needle, needle, mode);
1175+
if (res >= 0)
1176+
res += start;
1177+
}
1178+
else {
1179+
if (dir > 0)
1180+
res = stringlib_find_slice(
1181+
PyByteArray_AS_STRING(self), len,
1182+
sub, sub_len, start, end);
1183+
else
1184+
res = stringlib_rfind_slice(
1185+
PyByteArray_AS_STRING(self), len,
1186+
sub, sub_len, start, end);
1187+
}
11731188

11741189
if (subobj)
11751190
PyBuffer_Release(&subbuf);

Objects/bytesobject.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,7 +1914,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
19141914
char byte;
19151915
Py_buffer subbuf;
19161916
const char *sub;
1917-
Py_ssize_t sub_len;
1917+
Py_ssize_t len, sub_len;
19181918
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
19191919
Py_ssize_t res;
19201920

@@ -1933,15 +1933,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
19331933
sub = &byte;
19341934
sub_len = 1;
19351935
}
1936+
len = PyBytes_GET_SIZE(self);
19361937

1937-
if (dir > 0)
1938-
res = stringlib_find_slice(
1939-
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1940-
sub, sub_len, start, end);
1941-
else
1942-
res = stringlib_rfind_slice(
1943-
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1944-
sub, sub_len, start, end);
1938+
ADJUST_INDICES(start, end, len);
1939+
if (end - start < sub_len)
1940+
res = -1;
1941+
else if (sub_len == 1) {
1942+
unsigned char needle = *sub;
1943+
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1944+
res = stringlib_fastsearch_memchr_1char(
1945+
PyBytes_AS_STRING(self) + start, end - start,
1946+
needle, needle, mode);
1947+
if (res >= 0)
1948+
res += start;
1949+
}
1950+
else {
1951+
if (dir > 0)
1952+
res = stringlib_find_slice(
1953+
PyBytes_AS_STRING(self), len,
1954+
sub, sub_len, start, end);
1955+
else
1956+
res = stringlib_rfind_slice(
1957+
PyBytes_AS_STRING(self), len,
1958+
sub, sub_len, start, end);
1959+
}
19451960

19461961
if (subobj)
19471962
PyBuffer_Release(&subbuf);

Objects/stringlib/fastsearch.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
Py_LOCAL_INLINE(Py_ssize_t)
3737
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
3838
STRINGLIB_CHAR ch, unsigned char needle,
39-
Py_ssize_t maxcount, int mode)
39+
int mode)
4040
{
4141
if (mode == FAST_SEARCH) {
4242
const STRINGLIB_CHAR *ptr = s;
@@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
115115
if (needle != 0)
116116
#endif
117117
return STRINGLIB(fastsearch_memchr_1char)
118-
(s, n, p[0], needle, maxcount, mode);
118+
(s, n, p[0], needle, mode);
119119
}
120120
if (mode == FAST_COUNT) {
121121
for (i = 0; i < n; i++)

Objects/stringlib/find.h

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
1111
{
1212
Py_ssize_t pos;
1313

14-
if (str_len < 0)
15-
return -1;
14+
assert(str_len >= 0);
1615
if (sub_len == 0)
1716
return offset;
1817

@@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
3130
{
3231
Py_ssize_t pos;
3332

34-
if (str_len < 0)
35-
return -1;
33+
assert(str_len >= 0);
3634
if (sub_len == 0)
3735
return str_len + offset;
3836

@@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
4442
return pos;
4543
}
4644

47-
/* helper macro to fixup start/end slice values */
48-
#define ADJUST_INDICES(start, end, len) \
49-
if (end > len) \
50-
end = len; \
51-
else if (end < 0) { \
52-
end += len; \
53-
if (end < 0) \
54-
end = 0; \
55-
} \
56-
if (start < 0) { \
57-
start += len; \
58-
if (start < 0) \
59-
start = 0; \
60-
}
61-
6245
Py_LOCAL_INLINE(Py_ssize_t)
6346
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
6447
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
6548
Py_ssize_t start, Py_ssize_t end)
6649
{
67-
ADJUST_INDICES(start, end, str_len);
6850
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
6951
}
7052

@@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
7355
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
7456
Py_ssize_t start, Py_ssize_t end)
7557
{
76-
ADJUST_INDICES(start, end, str_len);
7758
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
7859
}
7960

0 commit comments

Comments
 (0)