Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 51dd721

Browse files
author
Nikita Glukhov
committed
Refactor jsonSelectivityContains()
1 parent b136cac commit 51dd721

File tree

1 file changed

+114
-55
lines changed

1 file changed

+114
-55
lines changed

src/backend/utils/adt/jsonb_selfuncs.c

Lines changed: 114 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,45 +1074,83 @@ jsonSelectivity(JsonPathStats stats, Datum scalar, Oid operator)
10741074
return sel;
10751075
}
10761076

1077+
/*
1078+
* jsonAccumulateSubPathSelectivity
1079+
* Transform absolute subpath selectivity into relative and accumulate it
1080+
* into parent path simply by multiplication of relative selectivities.
1081+
*/
1082+
static void
1083+
jsonAccumulateSubPathSelectivity(Selectivity subpath_abs_sel,
1084+
Selectivity path_freq,
1085+
Selectivity *path_relative_sel,
1086+
StringInfo pathstr,
1087+
JsonPathStats path_stats)
1088+
{
1089+
Selectivity sel = subpath_abs_sel / path_freq; /* relative selectivity */
1090+
1091+
/* XXX Try to take into account array length */
1092+
if (pathstr->data[pathstr->len - 1] == '#')
1093+
sel = 1.0 - pow(1.0 - sel, jsonPathStatsGetAvgArraySize(path_stats));
1094+
1095+
/* Accumulate selectivity of subpath into parent path */
1096+
*path_relative_sel *= sel;
1097+
}
1098+
10771099
/*
10781100
* jsonSelectivityContains
10791101
* Estimate selectivity for containment operator on JSON.
10801102
*
1081-
* XXX This really needs more comments explaining the logic.
1103+
* Iterate through query jsonb elements, build paths to its leaf elements,
1104+
* calculate selectivies of 'path == scalar' in leaves, multiply relative
1105+
* selectivities of subpaths at each path level, propagate computed
1106+
* selectivities to the root.
10821107
*/
10831108
static Selectivity
10841109
jsonSelectivityContains(JsonStats stats, Jsonb *jb)
10851110
{
10861111
JsonbValue v;
10871112
JsonbIterator *it;
10881113
JsonbIteratorToken r;
1089-
StringInfoData pathstr;
1090-
struct Path
1114+
StringInfoData pathstr; /* path string */
1115+
struct Path /* path stack entry */
10911116
{
1092-
struct Path *parent;
1093-
int len;
1094-
JsonPathStats stats;
1095-
Selectivity freq;
1096-
Selectivity sel;
1097-
} root,
1098-
*path = &root;
1099-
Selectivity scalarSel = 0.0;
1100-
Selectivity sel;
1101-
bool rawScalar = false;
1102-
1117+
struct Path *parent; /* parent entry */
1118+
int len; /* associated length of pathstr */
1119+
Selectivity freq; /* absolute frequence of path */
1120+
Selectivity sel; /* relative selectivity of subpaths */
1121+
JsonPathStats stats; /* statistics for the path */
1122+
} root, /* root path entry */
1123+
*path = &root; /* path entry stack */
1124+
Selectivity sel; /* resulting selectivity */
1125+
Selectivity scalarSel; /* selectivity of 'jsonb == scalar' */
1126+
1127+
/* Initialize root path string */
11031128
initStringInfo(&pathstr);
1104-
11051129
appendStringInfo(&pathstr, "$");
11061130

1131+
/* Initialize root path entry */
11071132
root.parent = NULL;
11081133
root.len = pathstr.len;
11091134
root.stats = jsonStatsGetPathStatsStr(stats, pathstr.data, pathstr.len);
11101135
root.freq = jsonPathStatsGetFreq(root.stats, 0.0);
11111136
root.sel = 1.0;
11121137

1138+
/* Return 0, if NULL fraction is 1. */
11131139
if (root.freq <= 0.0)
11141140
return 0.0;
11151141

1142+
/*
1143+
* Selectivity of query 'jsonb @> scalar' consists of selectivities of
1144+
* 'jsonb == scalar' and 'jsonb[*] == scalar'. Selectivity of
1145+
* 'jsonb[*] == scalar' will be computed in root.sel, but for
1146+
* 'jsonb == scalar' we need additional computation.
1147+
*/
1148+
if (JsonContainerIsScalar(&jb->root))
1149+
scalarSel = jsonSelectivity(root.stats, JsonbPGetDatum(jb),
1150+
JsonbEqOperator);
1151+
else
1152+
scalarSel = 0.0;
1153+
11161154
it = JsonbIteratorInit(&jb->root);
11171155

11181156
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
@@ -1121,58 +1159,82 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
11211159
{
11221160
case WJB_BEGIN_OBJECT:
11231161
{
1124-
struct Path *p = palloc(sizeof(*p));
1162+
struct Path *p;
1163+
Selectivity freq =
1164+
jsonPathStatsGetTypeFreq(path->stats, jbvObject, 0.0);
1165+
1166+
/* If there are no objects, selectivity is 0. */
1167+
if (freq <= 0.0)
1168+
return 0.0;
11251169

1170+
/*
1171+
* Push path entry for object keys, actual key names are
1172+
* appended later in WJB_KEY case.
1173+
*/
1174+
p = palloc(sizeof(*p));
11261175
p->len = pathstr.len;
11271176
p->parent = path;
11281177
p->stats = NULL;
1129-
p->freq = jsonPathStatsGetTypeFreq(path->stats, jbvObject, 0.0);
1130-
if (p->freq <= 0.0)
1131-
return 0.0;
1178+
p->freq = freq;
11321179
p->sel = 1.0;
11331180
path = p;
11341181
break;
11351182
}
11361183

11371184
case WJB_BEGIN_ARRAY:
11381185
{
1139-
struct Path *p = palloc(sizeof(*p));
1140-
1141-
rawScalar = v.val.array.rawScalar;
1186+
struct Path *p;
1187+
JsonPathStats pstats;
1188+
Selectivity freq;
11421189

1190+
/* Appeend path string entry for array elements, get stats. */
11431191
appendStringInfo(&pathstr, ".#");
1192+
pstats = jsonStatsGetPathStatsStr(stats, pathstr.data,
1193+
pathstr.len);
1194+
freq = jsonPathStatsGetFreq(pstats, 0.0);
1195+
1196+
/* If there are no arrays, return 0 or scalar selectivity */
1197+
if (freq <= 0.0)
1198+
return scalarSel;
1199+
1200+
/* Push path entry for array elements. */
1201+
p = palloc(sizeof(*p));
11441202
p->len = pathstr.len;
11451203
p->parent = path;
1146-
p->stats = jsonStatsGetPathStatsStr(stats, pathstr.data,
1147-
pathstr.len);
1148-
p->freq = jsonPathStatsGetFreq(p->stats, 0.0);
1149-
if (p->freq <= 0.0 && !rawScalar)
1150-
return 0.0;
1204+
p->stats = pstats;
1205+
p->freq = freq;
11511206
p->sel = 1.0;
11521207
path = p;
1153-
11541208
break;
11551209
}
11561210

11571211
case WJB_END_OBJECT:
11581212
case WJB_END_ARRAY:
11591213
{
11601214
struct Path *p = path;
1215+
/* Absoulte selectivity of the path with its all subpaths */
1216+
Selectivity abs_sel = p->sel * p->freq;
11611217

1218+
/* Pop last path entry */
11621219
path = path->parent;
1163-
sel = p->sel * p->freq / path->freq;
11641220
pfree(p);
1165-
pathstr.data[pathstr.len = path->len] = '\0';
1166-
if (pathstr.data[pathstr.len - 1] == '#')
1167-
sel = 1.0 - pow(1.0 - sel,
1168-
jsonPathStatsGetAvgArraySize(path->stats));
1169-
path->sel *= sel;
1221+
pathstr.len = path->len;
1222+
pathstr.data[pathstr.len] = '\0';
1223+
1224+
/* Accumulate selectivity into parent path */
1225+
jsonAccumulateSubPathSelectivity(abs_sel, path->freq,
1226+
&path->sel, &pathstr,
1227+
path->stats);
11701228
break;
11711229
}
11721230

11731231
case WJB_KEY:
11741232
{
1175-
pathstr.data[pathstr.len = path->parent->len] = '\0';
1233+
/* Remove previous key in the path string */
1234+
pathstr.len = path->parent->len;
1235+
pathstr.data[pathstr.len] = '\0';
1236+
1237+
/* Append current key to path string */
11761238
jsonPathAppendEntryWithLen(&pathstr, v.val.string.val,
11771239
v.val.string.len);
11781240
path->len = pathstr.len;
@@ -1182,26 +1244,22 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
11821244
case WJB_VALUE:
11831245
case WJB_ELEM:
11841246
{
1185-
JsonPathStats pstats = r == WJB_ELEM ? path->stats :
1247+
/*
1248+
* Extract statistics for path. Arrays elements shares the
1249+
* same statistics that was extracted in WJB_BEGIN_ARRAY.
1250+
*/
1251+
JsonPathStats pstats = r == WJB_ELEM ? path->stats :
11861252
jsonStatsGetPathStatsStr(stats, pathstr.data, pathstr.len);
1187-
Datum scalar = JsonbPGetDatum(JsonbValueToJsonb(&v));
1188-
1189-
if (path->freq <= 0.0)
1190-
sel = 0.0;
1191-
else
1192-
{
1193-
sel = jsonSelectivity(pstats, scalar, JsonbEqOperator);
1194-
sel /= path->freq;
1195-
if (pathstr.data[pathstr.len - 1] == '#')
1196-
sel = 1.0 - pow(1.0 - sel,
1197-
jsonPathStatsGetAvgArraySize(path->stats));
1198-
}
1199-
1200-
path->sel *= sel;
1201-
1202-
if (r == WJB_ELEM && path->parent == &root && rawScalar)
1203-
scalarSel = jsonSelectivity(root.stats, scalar,
1204-
JsonbEqOperator);
1253+
/* Make scalar jsonb datum */
1254+
Datum scalar = JsonbPGetDatum(JsonbValueToJsonb(&v));
1255+
/* Absolute selectivity of 'path == scalar' */
1256+
Selectivity abs_sel = jsonSelectivity(pstats, scalar,
1257+
JsonbEqOperator);
1258+
1259+
/* Accumulate selectivity into parent path */
1260+
jsonAccumulateSubPathSelectivity(abs_sel, path->freq,
1261+
&path->sel, &pathstr,
1262+
path->stats);
12051263
break;
12061264
}
12071265

@@ -1210,7 +1268,8 @@ jsonSelectivityContains(JsonStats stats, Jsonb *jb)
12101268
}
12111269
}
12121270

1213-
sel = scalarSel + root.sel * root.freq;
1271+
/* Compute absolute selectivity for root, including raw scalar case. */
1272+
sel = root.sel * root.freq + scalarSel;
12141273
CLAMP_PROBABILITY(sel);
12151274
return sel;
12161275
}

0 commit comments

Comments
 (0)