8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.21 2001/06/22 19:16:21 wieck Exp $
11
+ * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.22 2001/07/05 19:33:35 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
@@ -97,8 +97,8 @@ typedef struct
97
97
} ScalarMCVItem ;
98
98
99
99
100
- #define swapInt (a ,b ) {int _tmp; _tmp=a; a=b; b=_tmp;}
101
- #define swapDatum (a ,b ) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
100
+ #define swapInt (a ,b ) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
101
+ #define swapDatum (a ,b ) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
102
102
103
103
104
104
static int MESSAGE_LEVEL ;
@@ -111,20 +111,18 @@ static int *datumCmpTupnoLink;
111
111
112
112
static VacAttrStats * examine_attribute (Relation onerel , int attnum );
113
113
static int acquire_sample_rows (Relation onerel , HeapTuple * rows ,
114
- int targrows , long * totalrows );
114
+ int targrows , double * totalrows );
115
115
static double random_fract (void );
116
116
static double init_selection_state (int n );
117
- static long select_next_random_record (long t , int n , double * stateptr );
117
+ static double select_next_random_record (double t , int n , double * stateptr );
118
118
static int compare_rows (const void * a , const void * b );
119
119
static int compare_scalars (const void * a , const void * b );
120
120
static int compare_mcvs (const void * a , const void * b );
121
- static OffsetNumber get_page_max_offset (Relation relation ,
122
- BlockNumber blocknumber );
123
121
static void compute_minimal_stats (VacAttrStats * stats ,
124
- TupleDesc tupDesc , long totalrows ,
122
+ TupleDesc tupDesc , double totalrows ,
125
123
HeapTuple * rows , int numrows );
126
124
static void compute_scalar_stats (VacAttrStats * stats ,
127
- TupleDesc tupDesc , long totalrows ,
125
+ TupleDesc tupDesc , double totalrows ,
128
126
HeapTuple * rows , int numrows );
129
127
static void update_attstats (Oid relid , int natts , VacAttrStats * * vacattrstats );
130
128
@@ -143,7 +141,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
143
141
VacAttrStats * * vacattrstats ;
144
142
int targrows ,
145
143
numrows ;
146
- long totalrows ;
144
+ double totalrows ;
147
145
HeapTuple * rows ;
148
146
HeapTuple tuple ;
149
147
@@ -298,7 +296,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
298
296
if (!vacstmt -> vacuum )
299
297
vac_update_relstats (RelationGetRelid (onerel ),
300
298
onerel -> rd_nblocks ,
301
- ( double ) totalrows ,
299
+ totalrows ,
302
300
RelationGetForm (onerel )-> relhasindex );
303
301
304
302
/*
@@ -488,7 +486,7 @@ examine_attribute(Relation onerel, int attnum)
488
486
*/
489
487
static int
490
488
acquire_sample_rows (Relation onerel , HeapTuple * rows , int targrows ,
491
- long * totalrows )
489
+ double * totalrows )
492
490
{
493
491
int numrows = 0 ;
494
492
HeapScanDesc scan ;
@@ -499,7 +497,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
499
497
OffsetNumber lastoffset ;
500
498
int numest ;
501
499
double tuplesperpage ;
502
- long t ;
500
+ double t ;
503
501
double rstate ;
504
502
505
503
Assert (targrows > 1 );
@@ -520,7 +518,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
520
518
*/
521
519
if (!HeapTupleIsValid (tuple ))
522
520
{
523
- * totalrows = numrows ;
521
+ * totalrows = ( double ) numrows ;
524
522
return numrows ;
525
523
}
526
524
/*
@@ -565,20 +563,22 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
565
563
}
566
564
tuplesperpage = (double ) numest / (double ) estblock ;
567
565
568
- t = numrows ; /* t is the # of records processed so far */
566
+ t = ( double ) numrows ; /* t is the # of records processed so far */
569
567
rstate = init_selection_state (targrows );
570
568
for (;;)
571
569
{
572
570
double targpos ;
573
571
BlockNumber targblock ;
572
+ Buffer targbuffer ;
573
+ Page targpage ;
574
574
OffsetNumber targoffset ,
575
575
maxoffset ;
576
576
577
577
t = select_next_random_record (t , targrows , & rstate );
578
578
/* Try to read the t'th record in the table */
579
- targpos = ( double ) t / tuplesperpage ;
579
+ targpos = t / tuplesperpage ;
580
580
targblock = (BlockNumber ) targpos ;
581
- targoffset = ((int ) (targpos - targblock ) * tuplesperpage ) +
581
+ targoffset = ((int ) (( targpos - targblock ) * tuplesperpage ) ) +
582
582
FirstOffsetNumber ;
583
583
/* Make sure we are past the last selected record */
584
584
if (targblock <= lastblock )
@@ -595,21 +595,37 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
595
595
*/
596
596
if (targblock >= onerel -> rd_nblocks )
597
597
break ;
598
- maxoffset = get_page_max_offset (onerel , targblock );
598
+ /*
599
+ * We must maintain a pin on the target page's buffer to ensure that
600
+ * the maxoffset value stays good (else concurrent VACUUM might
601
+ * delete tuples out from under us). Hence, pin the page until we
602
+ * are done looking at it. We don't maintain a lock on the page,
603
+ * so tuples could get added to it, but we ignore such tuples.
604
+ */
605
+ targbuffer = ReadBuffer (onerel , targblock );
606
+ if (!BufferIsValid (targbuffer ))
607
+ elog (ERROR , "acquire_sample_rows: ReadBuffer(%s,%u) failed" ,
608
+ RelationGetRelationName (onerel ), targblock );
609
+ LockBuffer (targbuffer , BUFFER_LOCK_SHARE );
610
+ targpage = BufferGetPage (targbuffer );
611
+ maxoffset = PageGetMaxOffsetNumber (targpage );
612
+ LockBuffer (targbuffer , BUFFER_LOCK_UNLOCK );
613
+
599
614
for (;;)
600
615
{
601
616
HeapTupleData targtuple ;
602
- Buffer targbuffer ;
617
+ Buffer tupbuffer ;
603
618
604
619
if (targoffset > maxoffset )
605
620
{
606
621
/* Fell off end of this page, try next */
622
+ ReleaseBuffer (targbuffer );
607
623
targblock ++ ;
608
624
targoffset = FirstOffsetNumber ;
609
625
goto pageloop ;
610
626
}
611
627
ItemPointerSet (& targtuple .t_self , targblock , targoffset );
612
- heap_fetch (onerel , SnapshotNow , & targtuple , & targbuffer , NULL );
628
+ heap_fetch (onerel , SnapshotNow , & targtuple , & tupbuffer , NULL );
613
629
if (targtuple .t_data != NULL )
614
630
{
615
631
/*
@@ -621,6 +637,9 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
621
637
Assert (k >= 0 && k < targrows );
622
638
heap_freetuple (rows [k ]);
623
639
rows [k ] = heap_copytuple (& targtuple );
640
+ /* this releases the second pin acquired by heap_fetch: */
641
+ ReleaseBuffer (tupbuffer );
642
+ /* this releases the initial pin: */
624
643
ReleaseBuffer (targbuffer );
625
644
lastblock = targblock ;
626
645
lastoffset = targoffset ;
@@ -639,7 +658,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
639
658
/*
640
659
* Estimate total number of valid rows in relation.
641
660
*/
642
- * totalrows = ( long ) ( onerel -> rd_nblocks * tuplesperpage + 0.5 );
661
+ * totalrows = floor (( double ) onerel -> rd_nblocks * tuplesperpage + 0.5 );
643
662
644
663
return numrows ;
645
664
}
@@ -667,6 +686,12 @@ random_fract(void)
667
686
* of the last record processed and next record to process. The only extra
668
687
* state needed between calls is W, a random state variable.
669
688
*
689
+ * Note: the original algorithm defines t, S, numer, and denom as integers.
690
+ * Here we express them as doubles to avoid overflow if the number of rows
691
+ * in the table exceeds INT_MAX. The algorithm should work as long as the
692
+ * row count does not become so large that it is not represented accurately
693
+ * in a double (on IEEE-math machines this would be around 2^52 rows).
694
+ *
670
695
* init_selection_state computes the initial W value.
671
696
*
672
697
* Given that we've already processed t records (t >= n),
@@ -680,36 +705,36 @@ init_selection_state(int n)
680
705
return exp (- log (random_fract ())/n );
681
706
}
682
707
683
- static long
684
- select_next_random_record (long t , int n , double * stateptr )
708
+ static double
709
+ select_next_random_record (double t , int n , double * stateptr )
685
710
{
686
711
/* The magic constant here is T from Vitter's paper */
687
- if (t <= (22 * n ))
712
+ if (t <= (22.0 * n ))
688
713
{
689
714
/* Process records using Algorithm X until t is large enough */
690
715
double V ,
691
716
quot ;
692
717
693
718
V = random_fract (); /* Generate V */
694
- t ++ ;
695
- quot = (double ) ( t - n ) / ( double ) t ;
719
+ t += 1 ;
720
+ quot = (t - ( double ) n ) / t ;
696
721
/* Find min S satisfying (4.1) */
697
722
while (quot > V )
698
723
{
699
- t ++ ;
700
- quot *= (double ) ( t - n ) / ( double ) t ;
724
+ t += 1 ;
725
+ quot *= (t - ( double ) n ) / t ;
701
726
}
702
727
}
703
728
else
704
729
{
705
730
/* Now apply Algorithm Z */
706
731
double W = * stateptr ;
707
- long term = t - n + 1 ;
708
- int S ;
732
+ double term = t - ( double ) n + 1 ;
733
+ double S ;
709
734
710
735
for (;;)
711
736
{
712
- long numer ,
737
+ double numer ,
713
738
numer_lim ,
714
739
denom ;
715
740
double U ,
@@ -722,9 +747,9 @@ select_next_random_record(long t, int n, double *stateptr)
722
747
/* Generate U and X */
723
748
U = random_fract ();
724
749
X = t * (W - 1.0 );
725
- S = X ; /* S is tentatively set to floor(X) */
750
+ S = floor ( X ); /* S is tentatively set to floor(X) */
726
751
/* Test if U <= h(S)/cg(X) in the manner of (6.3) */
727
- tmp = (double ) ( t + 1 ) / ( double ) term ;
752
+ tmp = (t + 1 ) / term ;
728
753
lhs = exp (log (((U * tmp * tmp ) * (term + S ))/(t + X ))/n );
729
754
rhs = (((t + X )/(term + S )) * term )/t ;
730
755
if (lhs <= rhs )
@@ -734,20 +759,20 @@ select_next_random_record(long t, int n, double *stateptr)
734
759
}
735
760
/* Test if U <= f(S)/cg(X) */
736
761
y = (((U * (t + 1 ))/term ) * (t + S + 1 ))/(t + X );
737
- if (n < S )
762
+ if (( double ) n < S )
738
763
{
739
764
denom = t ;
740
765
numer_lim = term + S ;
741
766
}
742
767
else
743
768
{
744
- denom = t - n + S ;
769
+ denom = t - ( double ) n + S ;
745
770
numer_lim = t + 1 ;
746
771
}
747
- for (numer = t + S ; numer >= numer_lim ; numer -- )
772
+ for (numer = t + S ; numer >= numer_lim ; numer -= 1 )
748
773
{
749
- y *= ( double ) numer / ( double ) denom ;
750
- denom -- ;
774
+ y *= numer / denom ;
775
+ denom -= 1 ;
751
776
}
752
777
W = exp (- log (random_fract ())/n ); /* Generate W in advance */
753
778
if (exp (log (y )/n ) <= (t + X )/t )
@@ -783,30 +808,6 @@ compare_rows(const void *a, const void *b)
783
808
return 0 ;
784
809
}
785
810
786
- /*
787
- * Discover the largest valid tuple offset number on the given page
788
- *
789
- * This code probably ought to live in some other module.
790
- */
791
- static OffsetNumber
792
- get_page_max_offset (Relation relation , BlockNumber blocknumber )
793
- {
794
- Buffer buffer ;
795
- Page p ;
796
- OffsetNumber offnum ;
797
-
798
- buffer = ReadBuffer (relation , blocknumber );
799
- if (!BufferIsValid (buffer ))
800
- elog (ERROR , "get_page_max_offset: %s relation: ReadBuffer(%ld) failed" ,
801
- RelationGetRelationName (relation ), (long ) blocknumber );
802
- LockBuffer (buffer , BUFFER_LOCK_SHARE );
803
- p = BufferGetPage (buffer );
804
- offnum = PageGetMaxOffsetNumber (p );
805
- LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
806
- ReleaseBuffer (buffer );
807
- return offnum ;
808
- }
809
-
810
811
811
812
/*
812
813
* compute_minimal_stats() -- compute minimal column statistics
@@ -825,7 +826,7 @@ get_page_max_offset(Relation relation, BlockNumber blocknumber)
825
826
*/
826
827
static void
827
828
compute_minimal_stats (VacAttrStats * stats ,
828
- TupleDesc tupDesc , long totalrows ,
829
+ TupleDesc tupDesc , double totalrows ,
829
830
HeapTuple * rows , int numrows )
830
831
{
831
832
int i ;
@@ -1002,7 +1003,7 @@ compute_minimal_stats(VacAttrStats *stats,
1002
1003
1003
1004
if (f1 < 1 )
1004
1005
f1 = 1 ;
1005
- term1 = sqrt (( double ) totalrows / (double ) numrows ) * f1 ;
1006
+ term1 = sqrt (totalrows / (double ) numrows ) * f1 ;
1006
1007
stats -> stadistinct = floor (term1 + nmultiple + 0.5 );
1007
1008
}
1008
1009
@@ -1104,7 +1105,7 @@ compute_minimal_stats(VacAttrStats *stats,
1104
1105
*/
1105
1106
static void
1106
1107
compute_scalar_stats (VacAttrStats * stats ,
1107
- TupleDesc tupDesc , long totalrows ,
1108
+ TupleDesc tupDesc , double totalrows ,
1108
1109
HeapTuple * rows , int numrows )
1109
1110
{
1110
1111
int i ;
@@ -1298,7 +1299,7 @@ compute_scalar_stats(VacAttrStats *stats,
1298
1299
1299
1300
if (f1 < 1 )
1300
1301
f1 = 1 ;
1301
- term1 = sqrt (( double ) totalrows / (double ) numrows ) * f1 ;
1302
+ term1 = sqrt (totalrows / (double ) numrows ) * f1 ;
1302
1303
stats -> stadistinct = floor (term1 + nmultiple + 0.5 );
1303
1304
}
1304
1305
0 commit comments