61
61
#define ITEM_FREQUENCY (item ,ndims ) ((double *) (ITEM_NULLS(item, ndims) + (ndims)))
62
62
#define ITEM_BASE_FREQUENCY (item ,ndims ) ((double *) (ITEM_FREQUENCY(item, ndims) + 1))
63
63
64
+ /*
65
+ * Used to compute size of serialized MCV list representation.
66
+ */
67
+ #define MinSizeOfMCVList \
68
+ (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
69
+
70
+ #define SizeOfMCVList (ndims ,nitems ) \
71
+ (MAXALIGN(MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
72
+ MAXALIGN((ndims) * sizeof(DimensionInfo)) + \
73
+ MAXALIGN((nitems) * ITEM_SIZE(ndims)))
64
74
65
75
static MultiSortSupport build_mss (VacAttrStats * * stats , int numattrs );
66
76
@@ -491,7 +501,6 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
491
501
char * item = palloc0 (itemsize );
492
502
493
503
/* serialized items (indexes into arrays, etc.) */
494
- bytea * output ;
495
504
char * raw ;
496
505
char * ptr ;
497
506
@@ -625,27 +634,53 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
625
634
* Now we can finally compute how much space we'll actually need for the
626
635
* whole serialized MCV list (varlena header, MCV header, dimension info
627
636
* for each attribute, deduplicated values and items).
637
+ *
638
+ * The header fields are copied one by one, so that we don't need any
639
+ * explicit alignment (we copy them while deserializing). All fields
640
+ * after this need to be properly aligned, for direct access.
628
641
*/
629
- total_length = offsetof(MCVList , items )
630
- + MAXALIGN (ndims * sizeof (DimensionInfo ));
642
+ total_length = MAXALIGN (VARHDRSZ + (3 * sizeof (uint32 ))
643
+ + sizeof (AttrNumber ) + (ndims * sizeof (Oid )));
644
+
645
+ /* dimension info */
646
+ total_length += MAXALIGN (ndims * sizeof (DimensionInfo ));
631
647
632
648
/* add space for the arrays of deduplicated values */
633
649
for (i = 0 ; i < ndims ; i ++ )
634
650
total_length += MAXALIGN (info [i ].nbytes );
635
651
636
- /* and finally the items (no additional alignment needed) */
652
+ /*
653
+ * And finally the items (no additional alignment needed, we start
654
+ * at proper alignment and the itemsize formula uses MAXALIGN)
655
+ */
637
656
total_length += mcvlist -> nitems * itemsize ;
638
657
639
658
/*
640
659
* Allocate space for the whole serialized MCV list (we'll skip bytes,
641
660
* so we set them to zero to make the result more compressible).
642
661
*/
643
662
raw = palloc0 (total_length );
644
- ptr = raw ;
663
+ SET_VARSIZE (raw , total_length );
664
+ ptr = VARDATA (raw );
665
+
666
+ /* copy the MCV list header fields, one by one */
667
+ memcpy (ptr , & mcvlist -> magic , sizeof (uint32 ));
668
+ ptr += sizeof (uint32 );
669
+
670
+ memcpy (ptr , & mcvlist -> type , sizeof (uint32 ));
671
+ ptr += sizeof (uint32 );
672
+
673
+ memcpy (ptr , & mcvlist -> nitems , sizeof (uint32 ));
674
+ ptr += sizeof (uint32 );
675
+
676
+ memcpy (ptr , & mcvlist -> ndimensions , sizeof (AttrNumber ));
677
+ ptr += sizeof (AttrNumber );
645
678
646
- /* copy the MCV list header */
647
- memcpy (ptr , mcvlist , offsetof(MCVList , items ));
648
- ptr += offsetof(MCVList , items );
679
+ memcpy (ptr , mcvlist -> types , sizeof (Oid ) * ndims );
680
+ ptr += (sizeof (Oid ) * ndims );
681
+
682
+ /* the header may not be exactly aligned, so make sure it is */
683
+ ptr = raw + MAXALIGN (ptr - raw );
649
684
650
685
/* store information about the attributes */
651
686
memcpy (ptr , info , sizeof (DimensionInfo ) * ndims );
@@ -761,14 +796,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
761
796
pfree (values );
762
797
pfree (counts );
763
798
764
- output = (bytea * ) palloc (VARHDRSZ + total_length );
765
- SET_VARSIZE (output , VARHDRSZ + total_length );
766
-
767
- memcpy (VARDATA_ANY (output ), raw , total_length );
768
-
769
- pfree (raw );
770
-
771
- return output ;
799
+ return (bytea * ) raw ;
772
800
}
773
801
774
802
/*
@@ -789,8 +817,7 @@ statext_mcv_deserialize(bytea *data)
789
817
char * ptr ;
790
818
791
819
int ndims ,
792
- nitems ,
793
- itemsize ;
820
+ nitems ;
794
821
DimensionInfo * info = NULL ;
795
822
796
823
/* local allocation buffer (used only for deserialization) */
@@ -810,24 +837,32 @@ statext_mcv_deserialize(bytea *data)
810
837
811
838
/*
812
839
* We can't possibly deserialize a MCV list if there's not even a complete
813
- * header.
840
+ * header. We need an explicit formula here, because we serialize the
841
+ * header fields one by one, so we need to ignore struct alignment.
814
842
*/
815
- if (VARSIZE_ANY_EXHDR (data ) < offsetof( MCVList , items ) )
843
+ if (VARSIZE_ANY (data ) < MinSizeOfMCVList )
816
844
elog (ERROR , "invalid MCV size %zd (expected at least %zu)" ,
817
- VARSIZE_ANY_EXHDR (data ), offsetof( MCVList , items ) );
845
+ VARSIZE_ANY (data ), MinSizeOfMCVList );
818
846
819
847
/* read the MCV list header */
820
848
mcvlist = (MCVList * ) palloc0 (offsetof(MCVList , items ));
821
849
822
- /* initialize pointer to the data part (skip the varlena header) */
823
- raw = palloc (VARSIZE_ANY_EXHDR (data ));
824
- ptr = raw ;
825
-
826
- memcpy (raw , VARDATA_ANY (data ), VARSIZE_ANY_EXHDR (data ));
850
+ /* pointer to the data part (skip the varlena header) */
851
+ ptr = VARDATA_ANY (data );
852
+ raw = (char * ) data ;
827
853
828
854
/* get the header and perform further sanity checks */
829
- memcpy (mcvlist , ptr , offsetof(MCVList , items ));
830
- ptr += offsetof(MCVList , items );
855
+ memcpy (& mcvlist -> magic , ptr , sizeof (uint32 ));
856
+ ptr += sizeof (uint32 );
857
+
858
+ memcpy (& mcvlist -> type , ptr , sizeof (uint32 ));
859
+ ptr += sizeof (uint32 );
860
+
861
+ memcpy (& mcvlist -> nitems , ptr , sizeof (uint32 ));
862
+ ptr += sizeof (uint32 );
863
+
864
+ memcpy (& mcvlist -> ndimensions , ptr , sizeof (AttrNumber ));
865
+ ptr += sizeof (AttrNumber );
831
866
832
867
if (mcvlist -> magic != STATS_MCV_MAGIC )
833
868
elog (ERROR , "invalid MCV magic %u (expected %u)" ,
@@ -852,25 +887,29 @@ statext_mcv_deserialize(bytea *data)
852
887
853
888
nitems = mcvlist -> nitems ;
854
889
ndims = mcvlist -> ndimensions ;
855
- itemsize = ITEM_SIZE (ndims );
856
890
857
891
/*
858
892
* Check amount of data including DimensionInfo for all dimensions and
859
893
* also the serialized items (including uint16 indexes). Also, walk
860
894
* through the dimension information and add it to the sum.
861
895
*/
862
- expected_size = offsetof(MCVList , items ) +
863
- ndims * sizeof (DimensionInfo ) +
864
- (nitems * itemsize );
896
+ expected_size = SizeOfMCVList (ndims , nitems );
865
897
866
898
/*
867
899
* Check that we have at least the dimension and info records, along with
868
900
* the items. We don't know the size of the serialized values yet. We need
869
901
* to do this check first, before accessing the dimension info.
870
902
*/
871
- if (VARSIZE_ANY_EXHDR (data ) < expected_size )
903
+ if (VARSIZE_ANY (data ) < expected_size )
872
904
elog (ERROR , "invalid MCV size %zd (expected %zu)" ,
873
- VARSIZE_ANY_EXHDR (data ), expected_size );
905
+ VARSIZE_ANY (data ), expected_size );
906
+
907
+ /* Now copy the array of type Oids. */
908
+ memcpy (ptr , mcvlist -> types , sizeof (Oid ) * ndims );
909
+ ptr += (sizeof (Oid ) * ndims );
910
+
911
+ /* ensure alignment of the pointer (after the header fields) */
912
+ ptr = raw + MAXALIGN (ptr - raw );
874
913
875
914
/* Now it's safe to access the dimension info. */
876
915
info = (DimensionInfo * ) ptr ;
@@ -894,9 +933,9 @@ statext_mcv_deserialize(bytea *data)
894
933
* (header, dimension info. items and deduplicated data). So do the final
895
934
* check on size.
896
935
*/
897
- if (VARSIZE_ANY_EXHDR (data ) != expected_size )
936
+ if (VARSIZE_ANY (data ) != expected_size )
898
937
elog (ERROR , "invalid MCV size %zd (expected %zu)" ,
899
- VARSIZE_ANY_EXHDR (data ), expected_size );
938
+ VARSIZE_ANY (data ), expected_size );
900
939
901
940
/*
902
941
* We need an array of Datum values for each dimension, so that we can
@@ -1063,18 +1102,17 @@ statext_mcv_deserialize(bytea *data)
1063
1102
ptr += ITEM_SIZE (ndims );
1064
1103
1065
1104
/* check we're not overflowing the input */
1066
- Assert (ptr <= (char * ) raw + VARSIZE_ANY_EXHDR (data ));
1105
+ Assert (ptr <= (char * ) raw + VARSIZE_ANY (data ));
1067
1106
}
1068
1107
1069
1108
/* check that we processed all the data */
1070
- Assert (ptr == raw + VARSIZE_ANY_EXHDR (data ));
1109
+ Assert (ptr == raw + VARSIZE_ANY (data ));
1071
1110
1072
1111
/* release the buffers used for mapping */
1073
1112
for (dim = 0 ; dim < ndims ; dim ++ )
1074
1113
pfree (map [dim ]);
1075
1114
1076
1115
pfree (map );
1077
- pfree (raw );
1078
1116
1079
1117
return mcvlist ;
1080
1118
}
0 commit comments