@@ -1074,45 +1074,83 @@ jsonSelectivity(JsonPathStats stats, Datum scalar, Oid operator)
1074
1074
return sel ;
1075
1075
}
1076
1076
1077
+ /*
1078
+ * jsonAccumulateSubPathSelectivity
1079
+ * Transform absolute subpath selectivity into relative and accumulate it
1080
+ * into parent path simply by multiplication of relative selectivities.
1081
+ */
1082
+ static void
1083
+ jsonAccumulateSubPathSelectivity (Selectivity subpath_abs_sel ,
1084
+ Selectivity path_freq ,
1085
+ Selectivity * path_relative_sel ,
1086
+ StringInfo pathstr ,
1087
+ JsonPathStats path_stats )
1088
+ {
1089
+ Selectivity sel = subpath_abs_sel / path_freq ; /* relative selectivity */
1090
+
1091
+ /* XXX Try to take into account array length */
1092
+ if (pathstr -> data [pathstr -> len - 1 ] == '#' )
1093
+ sel = 1.0 - pow (1.0 - sel , jsonPathStatsGetAvgArraySize (path_stats ));
1094
+
1095
+ /* Accumulate selectivity of subpath into parent path */
1096
+ * path_relative_sel *= sel ;
1097
+ }
1098
+
1077
1099
/*
1078
1100
* jsonSelectivityContains
1079
1101
* Estimate selectivity for containment operator on JSON.
1080
1102
*
1081
- * XXX This really needs more comments explaining the logic.
1103
+ * Iterate through query jsonb elements, build paths to its leaf elements,
1104
+ * calculate selectivies of 'path == scalar' in leaves, multiply relative
1105
+ * selectivities of subpaths at each path level, propagate computed
1106
+ * selectivities to the root.
1082
1107
*/
1083
1108
static Selectivity
1084
1109
jsonSelectivityContains (JsonStats stats , Jsonb * jb )
1085
1110
{
1086
1111
JsonbValue v ;
1087
1112
JsonbIterator * it ;
1088
1113
JsonbIteratorToken r ;
1089
- StringInfoData pathstr ;
1090
- struct Path
1114
+ StringInfoData pathstr ; /* path string */
1115
+ struct Path /* path stack entry */
1091
1116
{
1092
- struct Path * parent ;
1093
- int len ;
1094
- JsonPathStats stats ;
1095
- Selectivity freq ;
1096
- Selectivity sel ;
1097
- } root ,
1098
- * path = & root ;
1099
- Selectivity scalarSel = 0.0 ;
1100
- Selectivity sel ;
1101
- bool rawScalar = false;
1102
-
1117
+ struct Path * parent ; /* parent entry */
1118
+ int len ; /* associated length of pathstr */
1119
+ Selectivity freq ; /* absolute frequence of path */
1120
+ Selectivity sel ; /* relative selectivity of subpaths */
1121
+ JsonPathStats stats ; /* statistics for the path */
1122
+ } root , /* root path entry */
1123
+ * path = & root ; /* path entry stack */
1124
+ Selectivity sel ; /* resulting selectivity */
1125
+ Selectivity scalarSel ; /* selectivity of 'jsonb == scalar' */
1126
+
1127
+ /* Initialize root path string */
1103
1128
initStringInfo (& pathstr );
1104
-
1105
1129
appendStringInfo (& pathstr , "$" );
1106
1130
1131
+ /* Initialize root path entry */
1107
1132
root .parent = NULL ;
1108
1133
root .len = pathstr .len ;
1109
1134
root .stats = jsonStatsGetPathStatsStr (stats , pathstr .data , pathstr .len );
1110
1135
root .freq = jsonPathStatsGetFreq (root .stats , 0.0 );
1111
1136
root .sel = 1.0 ;
1112
1137
1138
+ /* Return 0, if NULL fraction is 1. */
1113
1139
if (root .freq <= 0.0 )
1114
1140
return 0.0 ;
1115
1141
1142
+ /*
1143
+ * Selectivity of query 'jsonb @> scalar' consists of selectivities of
1144
+ * 'jsonb == scalar' and 'jsonb[*] == scalar'. Selectivity of
1145
+ * 'jsonb[*] == scalar' will be computed in root.sel, but for
1146
+ * 'jsonb == scalar' we need additional computation.
1147
+ */
1148
+ if (JsonContainerIsScalar (& jb -> root ))
1149
+ scalarSel = jsonSelectivity (root .stats , JsonbPGetDatum (jb ),
1150
+ JsonbEqOperator );
1151
+ else
1152
+ scalarSel = 0.0 ;
1153
+
1116
1154
it = JsonbIteratorInit (& jb -> root );
1117
1155
1118
1156
while ((r = JsonbIteratorNext (& it , & v , false)) != WJB_DONE )
@@ -1121,58 +1159,82 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
1121
1159
{
1122
1160
case WJB_BEGIN_OBJECT :
1123
1161
{
1124
- struct Path * p = palloc (sizeof (* p ));
1162
+ struct Path * p ;
1163
+ Selectivity freq =
1164
+ jsonPathStatsGetTypeFreq (path -> stats , jbvObject , 0.0 );
1165
+
1166
+ /* If there are no objects, selectivity is 0. */
1167
+ if (freq <= 0.0 )
1168
+ return 0.0 ;
1125
1169
1170
+ /*
1171
+ * Push path entry for object keys, actual key names are
1172
+ * appended later in WJB_KEY case.
1173
+ */
1174
+ p = palloc (sizeof (* p ));
1126
1175
p -> len = pathstr .len ;
1127
1176
p -> parent = path ;
1128
1177
p -> stats = NULL ;
1129
- p -> freq = jsonPathStatsGetTypeFreq (path -> stats , jbvObject , 0.0 );
1130
- if (p -> freq <= 0.0 )
1131
- return 0.0 ;
1178
+ p -> freq = freq ;
1132
1179
p -> sel = 1.0 ;
1133
1180
path = p ;
1134
1181
break ;
1135
1182
}
1136
1183
1137
1184
case WJB_BEGIN_ARRAY :
1138
1185
{
1139
- struct Path * p = palloc ( sizeof ( * p )) ;
1140
-
1141
- rawScalar = v . val . array . rawScalar ;
1186
+ struct Path * p ;
1187
+ JsonPathStats pstats ;
1188
+ Selectivity freq ;
1142
1189
1190
+ /* Appeend path string entry for array elements, get stats. */
1143
1191
appendStringInfo (& pathstr , ".#" );
1192
+ pstats = jsonStatsGetPathStatsStr (stats , pathstr .data ,
1193
+ pathstr .len );
1194
+ freq = jsonPathStatsGetFreq (pstats , 0.0 );
1195
+
1196
+ /* If there are no arrays, return 0 or scalar selectivity */
1197
+ if (freq <= 0.0 )
1198
+ return scalarSel ;
1199
+
1200
+ /* Push path entry for array elements. */
1201
+ p = palloc (sizeof (* p ));
1144
1202
p -> len = pathstr .len ;
1145
1203
p -> parent = path ;
1146
- p -> stats = jsonStatsGetPathStatsStr (stats , pathstr .data ,
1147
- pathstr .len );
1148
- p -> freq = jsonPathStatsGetFreq (p -> stats , 0.0 );
1149
- if (p -> freq <= 0.0 && !rawScalar )
1150
- return 0.0 ;
1204
+ p -> stats = pstats ;
1205
+ p -> freq = freq ;
1151
1206
p -> sel = 1.0 ;
1152
1207
path = p ;
1153
-
1154
1208
break ;
1155
1209
}
1156
1210
1157
1211
case WJB_END_OBJECT :
1158
1212
case WJB_END_ARRAY :
1159
1213
{
1160
1214
struct Path * p = path ;
1215
+ /* Absoulte selectivity of the path with its all subpaths */
1216
+ Selectivity abs_sel = p -> sel * p -> freq ;
1161
1217
1218
+ /* Pop last path entry */
1162
1219
path = path -> parent ;
1163
- sel = p -> sel * p -> freq / path -> freq ;
1164
1220
pfree (p );
1165
- pathstr .data [pathstr .len = path -> len ] = '\0' ;
1166
- if (pathstr .data [pathstr .len - 1 ] == '#' )
1167
- sel = 1.0 - pow (1.0 - sel ,
1168
- jsonPathStatsGetAvgArraySize (path -> stats ));
1169
- path -> sel *= sel ;
1221
+ pathstr .len = path -> len ;
1222
+ pathstr .data [pathstr .len ] = '\0' ;
1223
+
1224
+ /* Accumulate selectivity into parent path */
1225
+ jsonAccumulateSubPathSelectivity (abs_sel , path -> freq ,
1226
+ & path -> sel , & pathstr ,
1227
+ path -> stats );
1170
1228
break ;
1171
1229
}
1172
1230
1173
1231
case WJB_KEY :
1174
1232
{
1175
- pathstr .data [pathstr .len = path -> parent -> len ] = '\0' ;
1233
+ /* Remove previous key in the path string */
1234
+ pathstr .len = path -> parent -> len ;
1235
+ pathstr .data [pathstr .len ] = '\0' ;
1236
+
1237
+ /* Append current key to path string */
1176
1238
jsonPathAppendEntryWithLen (& pathstr , v .val .string .val ,
1177
1239
v .val .string .len );
1178
1240
path -> len = pathstr .len ;
@@ -1182,26 +1244,22 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
1182
1244
case WJB_VALUE :
1183
1245
case WJB_ELEM :
1184
1246
{
1185
- JsonPathStats pstats = r == WJB_ELEM ? path -> stats :
1247
+ /*
1248
+ * Extract statistics for path. Arrays elements shares the
1249
+ * same statistics that was extracted in WJB_BEGIN_ARRAY.
1250
+ */
1251
+ JsonPathStats pstats = r == WJB_ELEM ? path -> stats :
1186
1252
jsonStatsGetPathStatsStr (stats , pathstr .data , pathstr .len );
1187
- Datum scalar = JsonbPGetDatum (JsonbValueToJsonb (& v ));
1188
-
1189
- if (path -> freq <= 0.0 )
1190
- sel = 0.0 ;
1191
- else
1192
- {
1193
- sel = jsonSelectivity (pstats , scalar , JsonbEqOperator );
1194
- sel /= path -> freq ;
1195
- if (pathstr .data [pathstr .len - 1 ] == '#' )
1196
- sel = 1.0 - pow (1.0 - sel ,
1197
- jsonPathStatsGetAvgArraySize (path -> stats ));
1198
- }
1199
-
1200
- path -> sel *= sel ;
1201
-
1202
- if (r == WJB_ELEM && path -> parent == & root && rawScalar )
1203
- scalarSel = jsonSelectivity (root .stats , scalar ,
1204
- JsonbEqOperator );
1253
+ /* Make scalar jsonb datum */
1254
+ Datum scalar = JsonbPGetDatum (JsonbValueToJsonb (& v ));
1255
+ /* Absolute selectivity of 'path == scalar' */
1256
+ Selectivity abs_sel = jsonSelectivity (pstats , scalar ,
1257
+ JsonbEqOperator );
1258
+
1259
+ /* Accumulate selectivity into parent path */
1260
+ jsonAccumulateSubPathSelectivity (abs_sel , path -> freq ,
1261
+ & path -> sel , & pathstr ,
1262
+ path -> stats );
1205
1263
break ;
1206
1264
}
1207
1265
@@ -1210,7 +1268,8 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
1210
1268
}
1211
1269
}
1212
1270
1213
- sel = scalarSel + root .sel * root .freq ;
1271
+ /* Compute absolute selectivity for root, including raw scalar case. */
1272
+ sel = root .sel * root .freq + scalarSel ;
1214
1273
CLAMP_PROBABILITY (sel );
1215
1274
return sel ;
1216
1275
}
0 commit comments