Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 09368d2

Browse files
committed
Fix 'all at one page bug' in picksplit method of R-tree emulation. Add defense
from buggy user-defined picksplit to GiST.
1 parent 1eef90d commit 09368d2

File tree

2 files changed

+173
-52
lines changed

2 files changed

+173
-52
lines changed

src/backend/access/gist/gistproc.c

+80-41
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Portions Copyright (c) 1994, Regents of the University of California
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.15 2009/01/01 17:23:35 momjian Exp $
13+
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.16 2009/04/06 14:27:27 teodor Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -272,6 +272,69 @@ chooseLR(GIST_SPLITVEC *v,
272272
v->spl_ldatum_exists = v->spl_rdatum_exists = false;
273273
}
274274

275+
/*
276+
* Trivial split: half of entries will be placed on one page
277+
* and another half - to another
278+
*/
279+
static void
280+
fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
281+
{
282+
OffsetNumber i,
283+
maxoff;
284+
BOX *unionL = NULL,
285+
*unionR = NULL;
286+
int nbytes;
287+
288+
maxoff = entryvec->n - 1;
289+
290+
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
291+
v->spl_left = (OffsetNumber *) palloc(nbytes);
292+
v->spl_right = (OffsetNumber *) palloc(nbytes);
293+
v->spl_nleft = v->spl_nright = 0;
294+
295+
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
296+
{
297+
BOX * cur = DatumGetBoxP(entryvec->vector[i].key);
298+
299+
if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
300+
{
301+
v->spl_left[v->spl_nleft] = i;
302+
if (unionL == NULL)
303+
{
304+
unionL = (BOX *) palloc(sizeof(BOX));
305+
*unionL = *cur;
306+
}
307+
else
308+
adjustBox(unionL, cur);
309+
310+
v->spl_nleft++;
311+
}
312+
else
313+
{
314+
v->spl_right[v->spl_nright] = i;
315+
if (unionR == NULL)
316+
{
317+
unionR = (BOX *) palloc(sizeof(BOX));
318+
*unionR = *cur;
319+
}
320+
else
321+
adjustBox(unionR, cur);
322+
323+
v->spl_nright++;
324+
}
325+
}
326+
327+
if (v->spl_ldatum_exists)
328+
adjustBox(unionL, DatumGetBoxP(v->spl_ldatum));
329+
v->spl_ldatum = BoxPGetDatum(unionL);
330+
331+
if (v->spl_rdatum_exists)
332+
adjustBox(unionR, DatumGetBoxP(v->spl_rdatum));
333+
v->spl_rdatum = BoxPGetDatum(unionR);
334+
335+
v->spl_ldatum_exists = v->spl_rdatum_exists = false;
336+
}
337+
275338
/*
276339
* The GiST PickSplit method
277340
*
@@ -324,52 +387,22 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
324387
adjustBox(&pageunion, cur);
325388
}
326389

327-
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
328-
listL = (OffsetNumber *) palloc(nbytes);
329-
listR = (OffsetNumber *) palloc(nbytes);
330-
unionL = (BOX *) palloc(sizeof(BOX));
331-
unionR = (BOX *) palloc(sizeof(BOX));
332390
if (allisequal)
333391
{
334-
cur = DatumGetBoxP(entryvec->vector[OffsetNumberNext(FirstOffsetNumber)].key);
335-
if (memcmp((void *) cur, (void *) &pageunion, sizeof(BOX)) == 0)
336-
{
337-
v->spl_left = listL;
338-
v->spl_right = listR;
339-
v->spl_nleft = v->spl_nright = 0;
340-
memcpy((void *) unionL, (void *) &pageunion, sizeof(BOX));
341-
memcpy((void *) unionR, (void *) &pageunion, sizeof(BOX));
342-
343-
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
344-
{
345-
if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
346-
{
347-
v->spl_left[v->spl_nleft] = i;
348-
v->spl_nleft++;
349-
}
350-
else
351-
{
352-
v->spl_right[v->spl_nright] = i;
353-
v->spl_nright++;
354-
}
355-
}
356-
357-
if (v->spl_ldatum_exists)
358-
adjustBox(unionL, DatumGetBoxP(v->spl_ldatum));
359-
v->spl_ldatum = BoxPGetDatum(unionL);
360-
361-
if (v->spl_rdatum_exists)
362-
adjustBox(unionR, DatumGetBoxP(v->spl_rdatum));
363-
v->spl_rdatum = BoxPGetDatum(unionR);
364-
365-
v->spl_ldatum_exists = v->spl_rdatum_exists = false;
366-
367-
PG_RETURN_POINTER(v);
368-
}
392+
/*
393+
* All entries are the same
394+
*/
395+
fallbackSplit(entryvec, v);
396+
PG_RETURN_POINTER(v);
369397
}
370398

399+
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
400+
listL = (OffsetNumber *) palloc(nbytes);
401+
listR = (OffsetNumber *) palloc(nbytes);
371402
listB = (OffsetNumber *) palloc(nbytes);
372403
listT = (OffsetNumber *) palloc(nbytes);
404+
unionL = (BOX *) palloc(sizeof(BOX));
405+
unionR = (BOX *) palloc(sizeof(BOX));
373406
unionB = (BOX *) palloc(sizeof(BOX));
374407
unionT = (BOX *) palloc(sizeof(BOX));
375408

@@ -452,6 +485,12 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
452485
else
453486
ADDLIST(listT, unionT, posT, i);
454487
}
488+
489+
if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB))
490+
{
491+
fallbackSplit(entryvec, v);
492+
PG_RETURN_POINTER(v);
493+
}
455494
}
456495

457496
/* which split more optimal? */

src/backend/access/gist/gistsplit.c

+93-11
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gist/gistsplit.c,v 1.7 2009/01/01 17:23:35 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gist/gistsplit.c,v 1.8 2009/04/06 14:27:27 teodor Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -280,6 +280,63 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC
280280
sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
281281
}
282282

283+
/*
284+
* Trivial picksplit implementaion. Function called only
285+
* if user-defined picksplit puts all keys to the one page.
286+
* That is a bug of user-defined picksplit but we'd like
287+
* to "fix" that.
288+
*/
289+
static void
290+
genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
291+
{
292+
OffsetNumber i,
293+
maxoff;
294+
int nbytes;
295+
GistEntryVector *evec;
296+
297+
maxoff = entryvec->n - 1;
298+
299+
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
300+
301+
v->spl_left = (OffsetNumber *) palloc(nbytes);
302+
v->spl_right = (OffsetNumber *) palloc(nbytes);
303+
v->spl_nleft = v->spl_nright = 0;
304+
305+
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
306+
{
307+
if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
308+
{
309+
v->spl_left[v->spl_nleft] = i;
310+
v->spl_nleft++;
311+
}
312+
else
313+
{
314+
v->spl_right[v->spl_nright] = i;
315+
v->spl_nright++;
316+
}
317+
}
318+
319+
/*
320+
* Form unions of each page
321+
*/
322+
323+
evec = palloc( sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ );
324+
325+
evec->n = v->spl_nleft;
326+
memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
327+
sizeof(GISTENTRY) * evec->n);
328+
v->spl_ldatum = FunctionCall2(&giststate->unionFn[attno],
329+
PointerGetDatum(evec),
330+
PointerGetDatum(&nbytes));
331+
332+
evec->n = v->spl_nright;
333+
memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
334+
sizeof(GISTENTRY) * evec->n);
335+
v->spl_rdatum = FunctionCall2(&giststate->unionFn[attno],
336+
PointerGetDatum(evec),
337+
PointerGetDatum(&nbytes));
338+
}
339+
283340
/*
284341
* Calls user picksplit method for attno columns to split vector to
285342
* two vectors. May use attno+n columns data to
@@ -296,7 +353,7 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
296353

297354
/*
298355
* now let the user-defined picksplit function set up the split vector; in
299-
* entryvec have no null value!!
356+
* entryvec there is no null value!!
300357
*/
301358

302359
sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
@@ -308,18 +365,43 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
308365
PointerGetDatum(entryvec),
309366
PointerGetDatum(sv));
310367

311-
/* compatibility with old code */
312-
if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
313-
sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
314-
if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
315-
sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
368+
if ( sv->spl_nleft == 0 || sv->spl_nright == 0 )
369+
{
370+
ereport(DEBUG1,
371+
(errcode(ERRCODE_INTERNAL_ERROR),
372+
errmsg("Picksplit method for %d column of index \"%s\" failed",
373+
attno+1, RelationGetRelationName(r)),
374+
errhint("Index is not optimal, to optimize it contact developer or try to use the column as a second one in create index command")));
316375

317-
if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
376+
/*
377+
* Reinit GIST_SPLITVEC. Although that fields are not used
378+
* by genericPickSplit(), let us set up it for further processing
379+
*/
380+
sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
381+
sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
382+
sv->spl_ldatum = v->spl_lattr[attno];
383+
sv->spl_rdatum = v->spl_rattr[attno];
384+
385+
genericPickSplit(giststate, entryvec, sv, attno);
386+
387+
if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
388+
supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
389+
}
390+
else
318391
{
319-
elog(LOG, "PickSplit method of %d columns of index '%s' doesn't support secondary split",
320-
attno + 1, RelationGetRelationName(r));
392+
/* compatibility with old code */
393+
if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
394+
sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
395+
if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
396+
sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
321397

322-
supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
398+
if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
399+
{
400+
elog(LOG, "PickSplit method of %d columns of index '%s' doesn't support secondary split",
401+
attno + 1, RelationGetRelationName(r));
402+
403+
supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
404+
}
323405
}
324406

325407
v->spl_lattr[attno] = sv->spl_ldatum;

0 commit comments

Comments
 (0)