Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 23f10b6

Browse files
committed
SP-GiST support of the range adjacent operator -|-
Alexander Korotkov, reviewed by Jeff Davis.
1 parent 2443a26 commit 23f10b6

File tree

5 files changed

+204
-58
lines changed

5 files changed

+204
-58
lines changed

src/backend/utils/adt/rangetypes.c

Lines changed: 62 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,64 @@ range_after(PG_FUNCTION_ARGS)
709709
PG_RETURN_BOOL(range_after_internal(typcache, r1, r2));
710710
}
711711

712+
/*
713+
* Check if two bounds A and B are "adjacent", where A is an upper bound and B
714+
* is a lower bound. For the bounds to be adjacent, each subtype value must
715+
* satisfy strictly one of the bounds: there are no values which satisfy both
716+
* bounds (i.e. less than A and greater than B); and there are no values which
717+
* satisfy neither bound (i.e. greater than A and less than B).
718+
*
719+
* For discrete ranges, we rely on the canonicalization function to see if A..B
720+
* normalizes to empty. (If there is no canonicalization function, it's
721+
* impossible for such a range to normalize to empty, so we needn't bother to
722+
* try.)
723+
*
724+
* If A == B, the ranges are adjacent only if the bounds have different
725+
* inclusive flags (i.e., exactly one of the ranges includes the common
726+
* boundary point).
727+
*
728+
* And if A > B then the ranges are not adjacent in this order.
729+
*/
730+
bool
731+
bounds_adjacent(TypeCacheEntry *typcache, RangeBound boundA, RangeBound boundB)
732+
{
733+
int cmp;
734+
735+
Assert(!boundA.lower && boundB.lower);
736+
737+
cmp = range_cmp_bound_values(typcache, &boundA, &boundB);
738+
if (cmp < 0)
739+
{
740+
RangeType *r;
741+
742+
/*
743+
* Bounds do not overlap; see if there are points in between.
744+
*/
745+
746+
/* in a continuous subtype, there are assumed to be points between */
747+
if (!OidIsValid(typcache->rng_canonical_finfo.fn_oid))
748+
return false;
749+
750+
/*
751+
* The bounds are of a discrete range type; so make a range A..B and
752+
* see if it's empty.
753+
*/
754+
755+
/* flip the inclusion flags */
756+
boundA.inclusive = !boundA.inclusive;
757+
boundB.inclusive = !boundB.inclusive;
758+
/* change upper/lower labels to avoid Assert failures */
759+
boundA.lower = true;
760+
boundB.lower = false;
761+
r = make_range(typcache, &boundA, &boundB, false);
762+
return RangeIsEmpty(r);
763+
}
764+
else if (cmp == 0)
765+
return boundA.inclusive != boundB.inclusive;
766+
else
767+
return false; /* bounds overlap */
768+
}
769+
712770
/* adjacent to (but not overlapping)? (internal version) */
713771
bool
714772
range_adjacent_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
@@ -719,8 +777,6 @@ range_adjacent_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
719777
upper2;
720778
bool empty1,
721779
empty2;
722-
RangeType *r3;
723-
int cmp;
724780

725781
/* Different types should be prevented by ANYRANGE matching rules */
726782
if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
@@ -734,62 +790,11 @@ range_adjacent_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
734790
return false;
735791

736792
/*
737-
* Given two ranges A..B and C..D, where B < C, the ranges are adjacent if
738-
* and only if the range B..C is empty, where inclusivity of these two
739-
* bounds is inverted compared to the original bounds. For discrete
740-
* ranges, we have to rely on the canonicalization function to normalize
741-
* B..C to empty if it contains no elements of the subtype. (If there is
742-
* no canonicalization function, it's impossible for such a range to
743-
* normalize to empty, so we needn't bother to try.)
744-
*
745-
* If B == C, the ranges are adjacent only if these bounds have different
746-
* inclusive flags (i.e., exactly one of the ranges includes the common
747-
* boundary point).
748-
*
749-
* And if B > C then the ranges cannot be adjacent in this order, but we
750-
* must consider the other order (i.e., check D <= A).
793+
* Given two ranges A..B and C..D, the ranges are adjacent if and only if
794+
* B is adjacent to C, or D is adjacent to A.
751795
*/
752-
cmp = range_cmp_bound_values(typcache, &upper1, &lower2);
753-
if (cmp < 0)
754-
{
755-
/* in a continuous subtype, there are assumed to be points between */
756-
if (!OidIsValid(typcache->rng_canonical_finfo.fn_oid))
757-
return (false);
758-
/* flip the inclusion flags */
759-
upper1.inclusive = !upper1.inclusive;
760-
lower2.inclusive = !lower2.inclusive;
761-
/* change upper/lower labels to avoid Assert failures */
762-
upper1.lower = true;
763-
lower2.lower = false;
764-
r3 = make_range(typcache, &upper1, &lower2, false);
765-
return RangeIsEmpty(r3);
766-
}
767-
if (cmp == 0)
768-
{
769-
return (upper1.inclusive != lower2.inclusive);
770-
}
771-
772-
cmp = range_cmp_bound_values(typcache, &upper2, &lower1);
773-
if (cmp < 0)
774-
{
775-
/* in a continuous subtype, there are assumed to be points between */
776-
if (!OidIsValid(typcache->rng_canonical_finfo.fn_oid))
777-
return (false);
778-
/* flip the inclusion flags */
779-
upper2.inclusive = !upper2.inclusive;
780-
lower1.inclusive = !lower1.inclusive;
781-
/* change upper/lower labels to avoid Assert failures */
782-
upper2.lower = true;
783-
lower1.lower = false;
784-
r3 = make_range(typcache, &upper2, &lower1, false);
785-
return RangeIsEmpty(r3);
786-
}
787-
if (cmp == 0)
788-
{
789-
return (upper2.inclusive != lower1.inclusive);
790-
}
791-
792-
return false;
796+
return (bounds_adjacent(typcache, upper1, lower2) ||
797+
bounds_adjacent(typcache, upper2, lower1));
793798
}
794799

795800
/* adjacent to (but not overlapping)? */

src/backend/utils/adt/rangetypes_spgist.c

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,16 @@ spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
305305
int which;
306306
int i;
307307

308+
/*
309+
* For adjacent search we need also previous centroid (if any) to improve
310+
* the precision of the consistent check. In this case needPrevious flag is
311+
* set and centroid is passed into reconstructedValues. This is not the
312+
* intended purpose of reconstructedValues (because we already have the
313+
* full value available at the leaf), but it's a convenient place to store
314+
* state while traversing the tree.
315+
*/
316+
bool needPrevious = false;
317+
308318
if (in->allTheSame)
309319
{
310320
/* Report that all nodes should be visited */
@@ -351,6 +361,7 @@ spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
351361
case RANGESTRAT_OVERLAPS:
352362
case RANGESTRAT_OVERRIGHT:
353363
case RANGESTRAT_AFTER:
364+
case RANGESTRAT_ADJACENT:
354365
/* These strategies return false if any argument is empty */
355366
if (empty)
356367
which = 0;
@@ -435,6 +446,9 @@ spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
435446
/* Are the restrictions on range bounds inclusive? */
436447
bool inclusive = true;
437448
bool strictEmpty = true;
449+
int cmp,
450+
which1,
451+
which2;
438452

439453
strategy = in->scankeys[i].sk_strategy;
440454

@@ -522,6 +536,118 @@ spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
522536
inclusive = false;
523537
break;
524538

539+
case RANGESTRAT_ADJACENT:
540+
if (empty)
541+
break; /* Skip to strictEmpty check. */
542+
543+
/*
544+
* which1 is bitmask for possibility to be adjacent with
545+
* lower bound of argument. which2 is bitmask for
546+
* possibility to be adjacent with upper bound of argument.
547+
*/
548+
which1 = which2 = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
549+
550+
/*
551+
* Previously selected quadrant could exclude possibility
552+
* for lower or upper bounds to be adjacent. Deserialize
553+
* previous centroid range if present for checking this.
554+
*/
555+
if (in->reconstructedValue != (Datum) 0)
556+
{
557+
RangeType *prevCentroid;
558+
RangeBound prevLower,
559+
prevUpper;
560+
bool prevEmpty;
561+
int cmp1,
562+
cmp2;
563+
564+
prevCentroid = DatumGetRangeType(in->reconstructedValue);
565+
range_deserialize(typcache, prevCentroid,
566+
&prevLower, &prevUpper, &prevEmpty);
567+
568+
/*
569+
* Check if lower bound of argument is not in a
570+
* quadrant we visited in the previous step.
571+
*/
572+
cmp1 = range_cmp_bounds(typcache, &lower, &prevUpper);
573+
cmp2 = range_cmp_bounds(typcache, &centroidUpper,
574+
&prevUpper);
575+
if ((cmp2 < 0 && cmp1 > 0) || (cmp2 > 0 && cmp1 < 0))
576+
which1 = 0;
577+
578+
/*
579+
* Check if upper bound of argument is not in a
580+
* quadrant we visited in the previous step.
581+
*/
582+
cmp1 = range_cmp_bounds(typcache, &upper, &prevLower);
583+
cmp2 = range_cmp_bounds(typcache, &centroidLower,
584+
&prevLower);
585+
if ((cmp2 < 0 && cmp1 > 0) || (cmp2 > 0 && cmp1 < 0))
586+
which2 = 0;
587+
}
588+
589+
if (which1)
590+
{
591+
/*
592+
* For a range's upper bound to be adjacent to the
593+
* argument's lower bound, it will be found along the
594+
* line adjacent to (and just below) Y=lower.
595+
* Therefore, if the argument's lower bound is less
596+
* than the centroid's upper bound, the line falls in
597+
* quadrants 2 and 3; if greater, the line falls in
598+
* quadrants 1 and 4.
599+
*
600+
* The above is true even when the argument's lower
601+
* bound is greater and adjacent to the centroid's
602+
* upper bound. If the argument's lower bound is
603+
* greater than the centroid's upper bound, then the
604+
* lowest value that an adjacent range could have is
605+
* that of the centroid's upper bound, which still
606+
* falls in quadrants 1 and 4.
607+
*
608+
* In the edge case, where the argument's lower bound
609+
* is equal to the cetroid's upper bound, there may be
610+
* adjacent ranges in any quadrant.
611+
*/
612+
cmp = range_cmp_bounds(typcache, &lower,
613+
&centroidUpper);
614+
if (cmp < 0)
615+
which1 &= (1 << 2) | (1 << 3);
616+
else if (cmp > 0)
617+
which1 &= (1 << 1) | (1 << 4);
618+
}
619+
620+
if (which2)
621+
{
622+
/*
623+
* For a range's lower bound to be adjacent to the
624+
* argument's upper bound, it will be found along the
625+
* line adjacent to (and just right of)
626+
* X=upper. Therefore, if the argument's upper bound is
627+
* less than (and not adjacent to) the centroid's upper
628+
* bound, the line falls in quadrants 3 and 4; if
629+
* greater or equal to, the line falls in quadrants 1
630+
* and 2.
631+
*
632+
* The edge case is when the argument's upper bound is
633+
* less than and adjacent to the centroid's lower
634+
* bound. In that case, adjacent ranges may be in any
635+
* quadrant.
636+
*/
637+
cmp = range_cmp_bounds(typcache, &lower,
638+
&centroidUpper);
639+
if (cmp < 0 &&
640+
!bounds_adjacent(typcache, upper, centroidLower))
641+
which1 &= (1 << 3) | (1 << 4);
642+
else if (cmp > 0)
643+
which1 &= (1 << 1) | (1 << 2);
644+
}
645+
646+
which &= which1 | which2;
647+
648+
needPrevious = true;
649+
break;
650+
525651
case RANGESTRAT_CONTAINS:
526652
/*
527653
* Non-empty range A contains non-empty range B if lower
@@ -652,11 +778,18 @@ spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
652778

653779
/* We must descend into the quadrant(s) identified by 'which' */
654780
out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
781+
if (needPrevious)
782+
out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
655783
out->nNodes = 0;
656784
for (i = 1; i <= in->nNodes; i++)
657785
{
658786
if (which & (1 << i))
787+
{
788+
/* Save previous prefix if needed */
789+
if (needPrevious)
790+
out->reconstructedValues[out->nNodes] = in->prefixDatum;
659791
out->nodeNumbers[out->nNodes++] = i - 1;
792+
}
660793
}
661794

662795
PG_RETURN_VOID();
@@ -713,6 +846,10 @@ spg_range_quad_leaf_consistent(PG_FUNCTION_ARGS)
713846
res = range_after_internal(typcache, leafRange,
714847
DatumGetRangeType(keyDatum));
715848
break;
849+
case RANGESTRAT_ADJACENT:
850+
res = range_adjacent_internal(typcache, leafRange,
851+
DatumGetRangeType(keyDatum));
852+
break;
716853
case RANGESTRAT_CONTAINS:
717854
res = range_contains_internal(typcache, leafRange,
718855
DatumGetRangeType(keyDatum));

src/include/catalog/pg_amop.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,7 @@ DATA(insert ( 3474 3831 3831 2 s 3895 4000 0 ));
775775
DATA(insert ( 3474 3831 3831 3 s 3888 4000 0 ));
776776
DATA(insert ( 3474 3831 3831 4 s 3896 4000 0 ));
777777
DATA(insert ( 3474 3831 3831 5 s 3894 4000 0 ));
778+
DATA(insert ( 3474 3831 3831 6 s 3897 4000 0 ));
778779
DATA(insert ( 3474 3831 3831 7 s 3890 4000 0 ));
779780
DATA(insert ( 3474 3831 3831 8 s 3892 4000 0 ));
780781
DATA(insert ( 3474 3831 2283 16 s 3889 4000 0 ));

src/include/utils/rangetypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ extern int range_cmp_bounds(TypeCacheEntry *typcache, RangeBound *b1,
201201
RangeBound *b2);
202202
extern int range_cmp_bound_values(TypeCacheEntry *typcache, RangeBound *b1,
203203
RangeBound *b2);
204+
extern bool bounds_adjacent(TypeCacheEntry *typcache, RangeBound bound1,
205+
RangeBound bound2);
204206
extern RangeType *make_empty_range(TypeCacheEntry *typcache);
205207

206208
/* GiST support (in rangetypes_gist.c) */

src/test/regress/expected/opr_sanity.out

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,7 @@ ORDER BY 1, 2, 3;
10761076
4000 | 4 | ~>=~
10771077
4000 | 5 | >>
10781078
4000 | 5 | ~>~
1079+
4000 | 6 | -|-
10791080
4000 | 6 | ~=
10801081
4000 | 7 | @>
10811082
4000 | 8 | <@
@@ -1087,7 +1088,7 @@ ORDER BY 1, 2, 3;
10871088
4000 | 15 | >
10881089
4000 | 16 | @>
10891090
4000 | 18 | =
1090-
(61 rows)
1091+
(62 rows)
10911092

10921093
-- Check that all opclass search operators have selectivity estimators.
10931094
-- This is not absolutely required, but it seems a reasonable thing

0 commit comments

Comments
 (0)