Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2341636

Browse files
committed
GIN improvements
- Replace sorted array of entries in maintenance_work_mem to binary tree, this should improve create performance. - More precisely calculate allocated memory, eliminate leaks with user-defined extractValue() - Improve wordings in tsearch2
1 parent fa60135 commit 2341636

File tree

4 files changed

+221
-74
lines changed

4 files changed

+221
-74
lines changed

contrib/tsearch2/ts_cfg.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,12 +313,12 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
313313
#ifdef IGNORE_LONGLEXEME
314314
ereport(NOTICE,
315315
(errcode(ERRCODE_SYNTAX_ERROR),
316-
errmsg("word is too long")));
316+
errmsg("A word you are indexing is too long. It will be ignored.")));
317317
continue;
318318
#else
319319
ereport(ERROR,
320320
(errcode(ERRCODE_SYNTAX_ERROR),
321-
errmsg("word is too long")));
321+
errmsg("A word you are indexing is too long")));
322322
#endif
323323
}
324324

@@ -470,12 +470,12 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
470470
#ifdef IGNORE_LONGLEXEME
471471
ereport(NOTICE,
472472
(errcode(ERRCODE_SYNTAX_ERROR),
473-
errmsg("word is too long")));
473+
errmsg("A word you are indexing is too long. It will be ignored.")));
474474
continue;
475475
#else
476476
ereport(ERROR,
477477
(errcode(ERRCODE_SYNTAX_ERROR),
478-
errmsg("word is too long")));
478+
errmsg("A word you are indexing is too long")));
479479
#endif
480480
}
481481

src/backend/access/gin/ginbulk.c

Lines changed: 191 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.2 2006/07/11 16:55:34 teodor Exp $
1212
*-------------------------------------------------------------------------
1313
*/
1414

@@ -22,17 +22,29 @@
2222
#include "utils/memutils.h"
2323
#include "access/tuptoaster.h"
2424

25-
#define DEF_NENTRY 128
25+
#define DEF_NENTRY 2048
2626
#define DEF_NPTR 4
2727

2828
void
2929
ginInitBA(BuildAccumulator *accum) {
30-
31-
accum->number = 0;
32-
accum->curget = 0;
33-
accum->length = DEF_NENTRY;
34-
accum->entries = (EntryAccumulator*)palloc0( sizeof(EntryAccumulator) * DEF_NENTRY );
35-
accum->allocatedMemory = sizeof(EntryAccumulator) * DEF_NENTRY;
30+
accum->maxdepth = 1;
31+
accum->stackpos = 0;
32+
accum->entries = NULL;
33+
accum->stack = NULL;
34+
accum->allocatedMemory = 0;
35+
accum->entryallocator = NULL;
36+
}
37+
38+
static EntryAccumulator*
39+
EAAllocate( BuildAccumulator *accum ) {
40+
if ( accum->entryallocator == NULL || accum->length>=DEF_NENTRY ) {
41+
accum->entryallocator = palloc(sizeof(EntryAccumulator)*DEF_NENTRY);
42+
accum->allocatedMemory += sizeof(EntryAccumulator)*DEF_NENTRY;
43+
accum->length = 0;
44+
}
45+
46+
accum->length++;
47+
return accum->entryallocator + accum->length - 1;
3648
}
3749

3850
/*
@@ -61,64 +73,133 @@ ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heap
6173
entry->number++;
6274
}
6375

76+
static Datum
77+
getDatumCopy(BuildAccumulator *accum, Datum value) {
78+
Form_pg_attribute *att = accum->ginstate->tupdesc->attrs;
79+
Datum newvalue;
80+
int data_length = 0;
81+
void *ptr;
82+
83+
if ( att[0]->attbyval ) {
84+
store_att_byval(&newvalue, value, att[0]->attlen);
85+
} else {
86+
/* pass-by-reference */
87+
if (att[0]->attlen == -1) {
88+
/* varlena */
89+
data_length = VARATT_SIZE(DatumGetPointer(value));
90+
} else if (att[0]->attlen == -2) {
91+
/* c-string */
92+
data_length = strlen(DatumGetCString(value)) + 1;
93+
} else {
94+
/* fixed-length pass-by-reference */
95+
Assert(att[0]->attlen > 0);
96+
data_length = att[0]->attlen;
97+
}
98+
99+
ptr = palloc( data_length );
100+
memcpy(ptr, DatumGetPointer(value), data_length);
101+
newvalue = PointerGetDatum(ptr);
102+
}
103+
104+
accum->allocatedMemory+=data_length;
105+
106+
return newvalue;
107+
}
108+
64109
/*
65110
* Find/store one entry from indexed value.
66-
* It supposes, that entry should be located between low and end of array of
67-
* entries. Returns position of found/inserted entry
68111
*/
69-
static uint32
70-
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry, uint32 low) {
71-
uint32 high = accum->number, mid;
72-
int res;
73-
74-
while(high>low) {
75-
mid = low + ((high - low) / 2);
76-
77-
res = compareEntries(accum->ginstate, entry, accum->entries[mid].value);
78-
79-
if ( res == 0 ) {
80-
ginInsertData( accum, accum->entries+mid, heapptr );
81-
return mid;
82-
} else if ( res > 0 )
83-
low = mid + 1;
84-
else
85-
high = mid;
112+
static void
113+
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry) {
114+
EntryAccumulator *ea = accum->entries, *pea = NULL;
115+
int res = 0;
116+
uint32 depth = 1;
117+
118+
while( ea ) {
119+
res = compareEntries(accum->ginstate, entry, ea->value);
120+
if ( res == 0 )
121+
break; /* found */
122+
else {
123+
pea = ea;
124+
if ( res < 0 )
125+
ea = ea->left;
126+
else
127+
ea = ea->right;
128+
}
129+
depth++;
86130
}
87131

88-
/* did not find an entry, insert */
89-
if ( accum->number >= accum->length ) {
90-
accum->allocatedMemory += sizeof(EntryAccumulator) * accum->length;
91-
accum->length *= 2;
92-
accum->entries = (EntryAccumulator*)repalloc( accum->entries,
93-
sizeof(EntryAccumulator) * accum->length );
94-
}
95-
96-
if ( high != accum->number )
97-
memmove( accum->entries+high+1, accum->entries+high, sizeof(EntryAccumulator) * (accum->number-high) );
132+
if ( depth > accum->maxdepth )
133+
accum->maxdepth = depth;
98134

99-
accum->entries[high].value = entry;
100-
accum->entries[high].length = DEF_NPTR;
101-
accum->entries[high].number = 1;
102-
accum->entries[high].shouldSort = FALSE;
103-
accum->entries[high].list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
104-
accum->entries[high].list[0] = *heapptr;
135+
if ( ea == NULL ) {
136+
ea = EAAllocate(accum);
105137

106-
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
107-
accum->number++;
138+
ea->left = ea->right = NULL;
139+
ea->value = getDatumCopy(accum, entry);
140+
ea->length = DEF_NPTR;
141+
ea->number = 1;
142+
ea->shouldSort = FALSE;
143+
ea->list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
144+
ea->list[0] = *heapptr;
145+
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
108146

109-
return high;
147+
if ( pea == NULL )
148+
accum->entries = ea;
149+
else {
150+
Assert( res != 0 );
151+
if ( res < 0 )
152+
pea->left = ea;
153+
else
154+
pea->right = ea;
155+
}
156+
} else
157+
ginInsertData( accum, ea, heapptr );
110158
}
111159

160+
/*
161+
* insert middle of left part the middle of right one,
162+
* then calls itself for each parts
163+
*/
164+
static void
165+
ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry,
166+
uint32 low, uint32 high, uint32 offset) {
167+
uint32 pos;
168+
uint32 middle = (low+high)>>1;
169+
170+
pos = (low+middle)>>1;
171+
if ( low!=middle && pos>=offset && pos-offset < nentry )
172+
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
173+
pos = (high+middle+1)>>1;
174+
if ( middle+1 != high && pos>=offset && pos-offset < nentry )
175+
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
176+
177+
if ( low!=middle )
178+
ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset );
179+
if ( high!=middle+1 )
180+
ginChooseElem(accum, heapptr, entries, nentry, middle+1, high, offset );
181+
}
112182

113183
/*
114-
* Insert one heap pointer. Requires entries to be sorted!
184+
* Insert one heap pointer. Suppose entries is sorted.
185+
* Insertion order trys to get binary tree balanced: first insert middle value,
186+
* next middle on left part and middle of right part.
115187
*/
116188
void
117189
ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) {
118-
uint32 start=0,i;
190+
uint32 i, nbit=0, offset;
191+
192+
if (nentry==0)
193+
return;
194+
195+
i=nentry-1;
196+
for(;i>0;i>>=1) nbit++;
119197

120-
for(i=0;i<nentry;i++)
121-
start = ginInsertEntry( accum, heapptr, entries[i], start);
198+
nbit = 1<<nbit;
199+
offset = (nbit-nentry)/2;
200+
201+
ginInsertEntry( accum, heapptr, entries[ (nbit>>1)-offset ]);
202+
ginChooseElem(accum, heapptr, entries, nentry, 0, nbit, offset);
122203
}
123204

124205
static int
@@ -128,28 +209,79 @@ qsortCompareItemPointers( const void *a, const void *b ) {
128209
return res;
129210
}
130211

212+
/*
213+
* walk on binary tree and returns ordered nodes
214+
*/
215+
static EntryAccumulator*
216+
walkTree( BuildAccumulator *accum ) {
217+
EntryAccumulator *entry = accum->stack[ accum->stackpos ];
218+
219+
if ( entry->list != NULL ) {
220+
/* return entry itself: we already was at left sublink */
221+
return entry;
222+
} else if ( entry->right && entry->right != accum->stack[ accum->stackpos+1 ] ) {
223+
/* go on right sublink */
224+
accum->stackpos++;
225+
entry = entry->right;
226+
227+
/* find most-left value */
228+
for(;;) {
229+
accum->stack[ accum->stackpos ] = entry;
230+
if ( entry->left ) {
231+
accum->stackpos++;
232+
entry = entry->left;
233+
} else
234+
break;
235+
}
236+
} else {
237+
/* we already return all left subtree, itself and right subtree */
238+
if ( accum->stackpos == 0 )
239+
return 0;
240+
accum->stackpos--;
241+
return walkTree(accum);
242+
}
243+
244+
return entry;
245+
}
246+
131247
ItemPointerData*
132248
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) {
133249
EntryAccumulator *entry;
134-
135250
ItemPointerData *list;
136-
if ( accum->curget >= accum->number )
251+
252+
253+
if ( accum->stack == NULL ) {
254+
/* first call */
255+
accum->stack = palloc0(sizeof(EntryAccumulator*)*(accum->maxdepth+1));
256+
entry = accum->entries;
257+
258+
/* find most-left value */
259+
for(;;) {
260+
accum->stack[ accum->stackpos ] = entry;
261+
if ( entry->left ) {
262+
accum->stackpos++;
263+
entry = entry->left;
264+
} else
265+
break;
266+
}
267+
} else {
268+
pfree( accum->stack[ accum->stackpos ]->list );
269+
accum->stack[ accum->stackpos ]->list = NULL;
270+
entry = walkTree( accum );
271+
}
272+
273+
if ( entry == NULL )
137274
return NULL;
138-
else if ( accum->curget > 0 )
139-
pfree( accum->entries[ accum->curget-1 ].list );
140275

141-
entry = accum->entries + accum->curget;
142276
*n = entry->number;
143277
*value = entry->value;
144278
list = entry->list;
145-
accum->curget++;
279+
280+
Assert(list != NULL);
146281

147282
if ( entry->shouldSort && entry->number > 1 )
148283
qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers);
149284

150-
151285
return list;
152286
}
153287

154-
155-

0 commit comments

Comments
 (0)