@@ -153,19 +153,19 @@ jsonStatsRelease(JsonStats data)
153
153
}
154
154
155
155
/*
156
- * jsonPathStatsGetSpecialStats
157
- * Extract statistics of given type for JSON path.
158
- *
159
- * XXX This does not really extract any stats, it merely allocates the struct?
156
+ * jsonPathStatsAllocSpecialStats
157
+ * Allocate a copy of JsonPathStats for accessing special (length etc.)
158
+ * stats for a given JSON path.
160
159
*/
161
160
static JsonPathStats
162
- jsonPathStatsGetSpecialStats (JsonPathStats pstats , JsonPathStatsType type )
161
+ jsonPathStatsAllocSpecialStats (JsonPathStats pstats , JsonPathStatsType type )
163
162
{
164
163
JsonPathStats stats ;
165
164
166
165
if (!pstats )
167
166
return NULL ;
168
167
168
+ /* copy and replace stats type */
169
169
stats = palloc (sizeof (* stats ));
170
170
* stats = * pstats ;
171
171
stats -> type = type ;
@@ -174,35 +174,39 @@ jsonPathStatsGetSpecialStats(JsonPathStats pstats, JsonPathStatsType type)
174
174
}
175
175
176
176
/*
177
- * jsonPathStatsGetLengthStats
178
- * Extract statistics of lengths (for arrays or objects) for the path.
177
+ * jsonPathStatsGetArrayLengthStats
178
+ * Extract statistics of array lengths for the path.
179
179
*/
180
180
JsonPathStats
181
- jsonPathStatsGetLengthStats (JsonPathStats pstats )
181
+ jsonPathStatsGetArrayLengthStats (JsonPathStats pstats )
182
182
{
183
183
/*
184
- * The length statistics is relevant only for values that are objects or
185
- * arrays. So if we observed no such values, we know there can't be such
184
+ * The array length statistics is relevant only for values that are arrays.
185
+ * So if we observed no such values, we know there can't be such
186
186
* statistics and so we simply return NULL.
187
187
*/
188
- if (jsonPathStatsGetTypeFreq (pstats , jbvObject , 0.0 ) <= 0.0 &&
189
- jsonPathStatsGetTypeFreq (pstats , jbvArray , 0.0 ) <= 0.0 )
188
+ if (jsonPathStatsGetTypeFreq (pstats , jbvArray , 0.0 ) <= 0.0 )
190
189
return NULL ;
191
190
192
- return jsonPathStatsGetSpecialStats (pstats , JsonPathStatsLength );
191
+ return jsonPathStatsAllocSpecialStats (pstats , JsonPathStatsArrayLength );
193
192
}
194
193
195
194
/*
196
- * jsonPathStatsGetArrayLengthStats
197
- * Extract statistics of lengths for arrays.
198
- *
199
- * XXX Why doesn't this do jsonPathStatsGetTypeFreq check similar to what
200
- * jsonPathStatsGetLengthStats does?
195
+ * jsonPathStatsGetObjectLengthStats
196
+ * Extract statistics of object length for the path.
201
197
*/
202
- static JsonPathStats
203
- jsonPathStatsGetArrayLengthStats (JsonPathStats pstats )
198
+ JsonPathStats
199
+ jsonPathStatsGetObjectLengthStats (JsonPathStats pstats )
204
200
{
205
- return jsonPathStatsGetSpecialStats (pstats , JsonPathStatsArrayLength );
201
+ /*
202
+ * The object length statistics is relevant only for values that are arrays.
203
+ * So if we observed no such values, we know there can't be such
204
+ * statistics and so we simply return NULL.
205
+ */
206
+ if (jsonPathStatsGetTypeFreq (pstats , jbvObject , 0.0 ) <= 0.0 )
207
+ return NULL ;
208
+
209
+ return jsonPathStatsAllocSpecialStats (pstats , JsonPathStatsObjectLength );
206
210
}
207
211
208
212
/*
@@ -474,15 +478,20 @@ jsonStatsGetPath(JsonStats jsdata, Datum *path, int pathlen, float4 *nullfrac)
474
478
else
475
479
{
476
480
/* Find array index stats */
477
- float4 arrfreq ;
478
-
479
481
/* FIXME consider object key "index" also */
480
- pstats = jsonPathStatsGetSubpath (pstats , NULL );
481
- sel *= jsonPathStatsGetArrayIndexSelectivity (pstats , index );
482
- arrfreq = jsonPathStatsGetFreq (pstats , 0.0 );
482
+ JsonPathStats arrstats = jsonPathStatsGetSubpath (pstats , NULL );
483
+
484
+ if (arrstats )
485
+ {
486
+ float4 arrfreq = jsonPathStatsGetFreq (pstats , 0.0 );
483
487
484
- if (arrfreq > 0.0 )
485
- sel /= arrfreq ;
488
+ sel *= jsonPathStatsGetArrayIndexSelectivity (pstats , index );
489
+
490
+ if (arrfreq > 0.0 )
491
+ sel /= arrfreq ;
492
+ }
493
+
494
+ pstats = arrstats ;
486
495
}
487
496
488
497
pfree (key );
@@ -702,7 +711,8 @@ jsonPathStatsExtractData(JsonPathStats pstats, JsonStatType stattype,
702
711
case JsonStatJsonb :
703
712
case JsonStatJsonbWithoutSubpaths :
704
713
key = pstats -> type == JsonPathStatsArrayLength ? "array_length" :
705
- pstats -> type == JsonPathStatsLength ? "length" : "json" ;
714
+ pstats -> type == JsonPathStatsObjectLength ? "object_length" :
715
+ "json" ;
706
716
type = JSONBOID ;
707
717
eqop = JsonbEqOperator ;
708
718
ltop = JsonbLtOperator ;
@@ -846,30 +856,24 @@ jsonPathStatsGetTypeFreq(JsonPathStats pstats, JsonbValueType type,
846
856
/*
847
857
* When dealing with (object/array) length stats, we only really care about
848
858
* objects and arrays.
859
+ *
860
+ * Lengths are always numeric, so simply return 0 if requested frequency
861
+ * of non-numeric values.
849
862
*/
850
- if (pstats -> type == JsonPathStatsLength )
863
+ if (pstats -> type == JsonPathStatsArrayLength )
851
864
{
852
- /*
853
- * Array/object length is always numeric, so simply return 0 if
854
- * requested non-numeric frequency.
855
- */
856
865
if (type != jbvNumeric )
857
866
return 0.0 ;
858
867
859
- return jsonPathStatsGetFloat (pstats , "freq_array" , defaultfreq ) +
860
- jsonPathStatsGetFloat (pstats , "freq_object" , defaultfreq );
868
+ return jsonPathStatsGetFloat (pstats , "freq_array" , defaultfreq );
861
869
}
862
870
863
- if (pstats -> type == JsonPathStatsArrayLength )
871
+ if (pstats -> type == JsonPathStatsObjectLength )
864
872
{
865
- /*
866
- * Array length is always numeric, so simply return 0 if requested
867
- * non-numeric frequency.
868
- */
869
873
if (type != jbvNumeric )
870
874
return 0.0 ;
871
875
872
- return jsonPathStatsGetFreq (pstats , defaultfreq );
876
+ return jsonPathStatsGetFloat (pstats , "freq_object" , defaultfreq );
873
877
}
874
878
875
879
/* Which JSON type are we interested in? Pick the right freq_type key. */
@@ -955,29 +959,33 @@ static HeapTuple
955
959
jsonStatsGetArrayIndexStatsTuple (JsonStats jsdata , JsonStatType type , int32 index )
956
960
{
957
961
/* Extract statistics for root array elements */
958
- JsonPathStats pstats = jsonStatsGetRootArrayPath (jsdata );
962
+ JsonPathStats arrstats = jsonStatsGetRootArrayPath (jsdata );
963
+ JsonPathStats rootstats ;
959
964
Selectivity index_sel ;
960
965
961
- if (!pstats )
966
+ if (!arrstats )
962
967
return NULL ;
963
968
964
969
/* Compute relative selectivity of 'EXISTS($[index])' */
965
- index_sel = jsonPathStatsGetArrayIndexSelectivity (pstats , index );
966
- index_sel /= jsonPathStatsGetFreq (pstats , 0.0 );
970
+ rootstats = jsonStatsGetRootPath (jsdata );
971
+ index_sel = jsonPathStatsGetArrayIndexSelectivity (rootstats , index );
972
+ index_sel /= jsonPathStatsGetFreq (arrstats , 0.0 );
967
973
968
974
/* Form pg_statistics tuple, taking into account array index selectivity */
969
- return jsonPathStatsFormTuple (pstats , type , 1.0 - index_sel );
975
+ return jsonPathStatsFormTuple (arrstats , type , 1.0 - index_sel );
970
976
}
971
977
972
978
/*
973
979
* jsonStatsGetPathFreq
974
980
* Return frequency of a path (fraction of documents containing it).
975
981
*/
976
982
static float4
977
- jsonStatsGetPathFreq (JsonStats jsdata , Datum * path , int pathlen )
983
+ jsonStatsGetPathFreq (JsonStats jsdata , Datum * path , int pathlen ,
984
+ bool try_array_indexes )
978
985
{
979
986
float4 nullfrac ;
980
- JsonPathStats pstats = jsonStatsGetPath (jsdata , path , pathlen , & nullfrac );
987
+ JsonPathStats pstats = jsonStatsGetPath (jsdata , path , pathlen ,
988
+ try_array_indexes , & nullfrac );
981
989
float4 freq = (1.0 - nullfrac ) * jsonPathStatsGetFreq (pstats , 0.0 );
982
990
983
991
CLAMP_PROBABILITY (freq );
@@ -1192,14 +1200,14 @@ static void
1192
1200
jsonAccumulateSubPathSelectivity (Selectivity subpath_abs_sel ,
1193
1201
Selectivity path_freq ,
1194
1202
Selectivity * path_relative_sel ,
1195
- bool is_array_accessor ,
1196
- JsonPathStats path_stats )
1203
+ JsonPathStats array_path_stats )
1197
1204
{
1198
1205
Selectivity sel = subpath_abs_sel / path_freq ; /* relative selectivity */
1199
1206
1200
1207
/* XXX Try to take into account array length */
1201
- if (is_array_accessor )
1202
- sel = 1.0 - pow (1.0 - sel , jsonPathStatsGetAvgArraySize (path_stats ));
1208
+ if (array_path_stats )
1209
+ sel = 1.0 - pow (1.0 - sel ,
1210
+ jsonPathStatsGetAvgArraySize (array_path_stats ));
1203
1211
1204
1212
/* Accumulate selectivity of subpath into parent path */
1205
1213
* path_relative_sel *= sel ;
@@ -1299,6 +1307,14 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
1299
1307
JsonPathStats pstats ;
1300
1308
Selectivity freq ;
1301
1309
1310
+ /*
1311
+ * First, find stats for the parent path if needed, it will be
1312
+ * used in jsonAccumulateSubPathSelectivity().
1313
+ */
1314
+ if (!path -> stats )
1315
+ path -> stats = jsonStatsFindPath (stats , pathstr .data ,
1316
+ pathstr .len );
1317
+
1302
1318
/* Appeend path string entry for array elements, get stats. */
1303
1319
jsonPathAppendEntry (& pathstr , NULL );
1304
1320
pstats = jsonStatsFindPath (stats , pathstr .data , pathstr .len );
@@ -1336,8 +1352,8 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
1336
1352
/* Accumulate selectivity into parent path */
1337
1353
jsonAccumulateSubPathSelectivity (abs_sel , path -> freq ,
1338
1354
& path -> sel ,
1339
- path -> is_array_accesor ,
1340
- path -> stats );
1355
+ path -> is_array_accesor ?
1356
+ path -> parent -> stats : NULL );
1341
1357
break ;
1342
1358
}
1343
1359
@@ -1358,22 +1374,28 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
1358
1374
case WJB_ELEM :
1359
1375
{
1360
1376
/*
1361
- * Extract statistics for path. Arrays elements shares the
1377
+ * Extract statistics for a path. Array elements share the
1362
1378
* same statistics that was extracted in WJB_BEGIN_ARRAY.
1363
1379
*/
1364
1380
JsonPathStats pstats = r == WJB_ELEM ? path -> stats :
1365
1381
jsonStatsFindPath (stats , pathstr .data , pathstr .len );
1366
- /* Make scalar jsonb datum */
1367
- Datum scalar = JsonbPGetDatum (JsonbValueToJsonb (& v ));
1368
- /* Absolute selectivity of 'path == scalar' */
1369
- Selectivity abs_sel = jsonSelectivity (pstats , scalar ,
1370
- JsonbEqOperator );
1382
+ Selectivity abs_sel ; /* Absolute selectivity of 'path == scalar' */
1383
+
1384
+ if (pstats )
1385
+ {
1386
+ /* Make scalar jsonb datum and compute selectivity */
1387
+ Datum scalar = JsonbPGetDatum (JsonbValueToJsonb (& v ));
1388
+
1389
+ abs_sel = jsonSelectivity (pstats , scalar , JsonbEqOperator );
1390
+ }
1391
+ else
1392
+ abs_sel = 0.0 ;
1371
1393
1372
1394
/* Accumulate selectivity into parent path */
1373
1395
jsonAccumulateSubPathSelectivity (abs_sel , path -> freq ,
1374
1396
& path -> sel ,
1375
- path -> is_array_accesor ,
1376
- path -> stats );
1397
+ path -> is_array_accesor ?
1398
+ path -> parent -> stats : NULL );
1377
1399
break ;
1378
1400
}
1379
1401
@@ -1417,7 +1439,7 @@ jsonSelectivityExists(JsonStats stats, Datum key)
1417
1439
arrstats = jsonStatsGetRootArrayPath (stats );
1418
1440
arraysel = jsonSelectivity (arrstats , jbkey , JsonbEqOperator );
1419
1441
arraysel = 1.0 - pow (1.0 - arraysel ,
1420
- jsonPathStatsGetAvgArraySize (arrstats ));
1442
+ jsonPathStatsGetAvgArraySize (rootstats ));
1421
1443
1422
1444
sel = keysel + scalarsel + arraysel ;
1423
1445
CLAMP_PROBABILITY (sel );
0 commit comments