Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b204148

Browse files
author
Nikita Glukhov
committed
Use bitmaps to fetch only the rows containing desired path
1 parent f56de33 commit b204148

File tree

1 file changed

+175
-10
lines changed

1 file changed

+175
-10
lines changed

src/backend/utils/adt/jsonb_typanalyze.c

Lines changed: 175 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,25 @@ typedef struct JsonValueStats
153153
* (for avg. array length) */
154154
} JsonValueStats;
155155

156+
typedef struct JsonPathDocBitmap
157+
{
158+
bool is_list;
159+
int size;
160+
int allocated;
161+
union
162+
{
163+
int32 *list;
164+
uint8 *bitmap;
165+
} data;
166+
} JsonPathDocBitmap;
167+
168+
/* JSON path and list of documents containing it */
169+
typedef struct JsonPathAnlDocs
170+
{
171+
JsonPathEntry path;
172+
JsonPathDocBitmap bitmap;
173+
} JsonPathAnlDocs;
174+
156175
/* Main structure for analyzed JSON path */
157176
typedef struct JsonPathAnlStats
158177
{
@@ -181,6 +200,7 @@ typedef struct JsonAnalyzeContext
181200
double totalrows;
182201
double total_width;
183202
int samplerows;
203+
int current_rownum;
184204
int target;
185205
int null_cnt;
186206
int analyzed_cnt;
@@ -227,6 +247,131 @@ JsonPathEntryHash(const void *key, Size keysize)
227247
return hash;
228248
}
229249

250+
static void
251+
jsonStatsBitmapInit(JsonPathDocBitmap *bitmap)
252+
{
253+
memset(bitmap, 0, sizeof(*bitmap));
254+
bitmap->is_list = true;
255+
}
256+
257+
static void
258+
jsonStatsBitmapAdd(JsonAnalyzeContext *cxt, JsonPathDocBitmap *bitmap, int doc)
259+
{
260+
/* Use more compact list representation if not too many bits set */
261+
if (bitmap->is_list)
262+
{
263+
int *list = bitmap->data.list;
264+
265+
#if 1 /* Enable list representation */
266+
if (bitmap->size > 0 && list[bitmap->size - 1] == doc)
267+
return;
268+
269+
if (bitmap->size < cxt->samplerows / sizeof(list[0]) / 8)
270+
{
271+
if (bitmap->size >= bitmap->allocated)
272+
{
273+
MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt);
274+
275+
if (bitmap->allocated)
276+
{
277+
bitmap->allocated *= 2;
278+
list = repalloc(list, sizeof(list[0]) * bitmap->allocated);
279+
}
280+
else
281+
{
282+
bitmap->allocated = 8;
283+
list = palloc(sizeof(list[0]) * bitmap->allocated);
284+
}
285+
286+
bitmap->data.list = list;
287+
288+
MemoryContextSwitchTo(oldcxt);
289+
}
290+
291+
list[bitmap->size++] = doc;
292+
return;
293+
}
294+
#endif
295+
/* convert list to bitmap */
296+
bitmap->allocated = (cxt->samplerows + 7) / 8;
297+
bitmap->data.bitmap = MemoryContextAllocZero(cxt->mcxt, bitmap->allocated);
298+
bitmap->is_list = false;
299+
300+
if (list)
301+
{
302+
for (int i = 0; i < bitmap->size; i++)
303+
{
304+
int d = list[i];
305+
306+
bitmap->data.bitmap[d / 8] |= (1 << (d % 8));
307+
}
308+
309+
pfree(list);
310+
}
311+
}
312+
313+
/* set bit in bitmap */
314+
if (doc < cxt->samplerows &&
315+
!(bitmap->data.bitmap[doc / 8] & (1 << (doc % 8))))
316+
{
317+
bitmap->data.bitmap[doc / 8] |= (1 << (doc % 8));
318+
bitmap->size++;
319+
}
320+
}
321+
322+
static bool
323+
jsonStatsBitmapNext(JsonPathDocBitmap *bitmap, int *pbit)
324+
{
325+
uint8 *bmp = bitmap->data.bitmap;
326+
uint8 *pb;
327+
uint8 *pb_end = &bmp[bitmap->allocated];
328+
int bit = *pbit;
329+
330+
Assert(!bitmap->is_list);
331+
332+
if (bit < 0)
333+
{
334+
pb = bmp;
335+
bit = 0;
336+
}
337+
else
338+
{
339+
++bit;
340+
pb = &bmp[bit / 8];
341+
bit %= 8;
342+
}
343+
344+
for (; pb < pb_end; pb++, bit = 0)
345+
{
346+
uint8 b;
347+
348+
/* Skip zero bytes */
349+
if (!bit)
350+
{
351+
while (!*pb)
352+
{
353+
if (++pb >= pb_end)
354+
return false;
355+
}
356+
}
357+
358+
b = *pb;
359+
360+
/* Skip zero bits */
361+
while (bit < 8 && !(b & (1 << bit)))
362+
bit++;
363+
364+
if (bit >= 8)
365+
continue; /* Non-zero bit not found, go to next byte */
366+
367+
/* Output next non-zero bit */
368+
*pbit = (pb - bmp) * 8 + bit;
369+
return true;
370+
}
371+
372+
return false;
373+
}
374+
230375
static void
231376
jsonStatsAnlInit(JsonPathAnlStats *stats)
232377
{
@@ -295,6 +440,8 @@ jsonAnalyzeAddPath(JsonAnalyzeContext *ctx, JsonPathEntry *parent,
295440

296441
if (ctx->single_pass)
297442
jsonStatsAnlInit((JsonPathAnlStats *) stats);
443+
else
444+
jsonStatsBitmapInit(&((JsonPathAnlDocs *) stats)->bitmap);
298445

299446
stats->depth = parent->depth + 1;
300447

@@ -437,6 +584,7 @@ jsonAnalyzeCollectPaths(JsonAnalyzeContext *ctx, Jsonb *jb, void *param)
437584
JsonbIterator *it;
438585
JsonbIteratorToken tok;
439586
JsonPathEntry *stats = &ctx->root->path;
587+
int doc = ctx->current_rownum;
440588
bool collect_values = (bool)(intptr_t) param;
441589
bool scalar = false;
442590

@@ -502,6 +650,10 @@ jsonAnalyzeCollectPaths(JsonAnalyzeContext *ctx, Jsonb *jb, void *param)
502650
jsonAnalyzeJsonValue(ctx,
503651
&((JsonPathAnlStats *) stats)->vstats,
504652
&jv);
653+
else if (stats != &ctx->root->path)
654+
jsonStatsBitmapAdd(ctx,
655+
&((JsonPathAnlDocs *) stats)->bitmap,
656+
doc);
505657

506658
/*
507659
* Manually recurse into container by creating child iterator.
@@ -1124,7 +1276,7 @@ jsonAnalyzeInit(JsonAnalyzeContext *ctx, VacAttrStats *stats,
11241276

11251277
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
11261278
hash_ctl.keysize = sizeof(JsonPathEntry);
1127-
hash_ctl.entrysize = ctx->single_pass ? sizeof(JsonPathAnlStats) : sizeof(JsonPathEntry);
1279+
hash_ctl.entrysize = ctx->single_pass ? sizeof(JsonPathAnlStats) : sizeof(JsonPathAnlDocs);
11281280
hash_ctl.hash = JsonPathEntryHash;
11291281
hash_ctl.match = JsonPathEntryMatch;
11301282
hash_ctl.hcxt = ctx->mcxt;
@@ -1146,24 +1298,23 @@ jsonAnalyzeInit(JsonAnalyzeContext *ctx, VacAttrStats *stats,
11461298
static void
11471299
jsonAnalyzePass(JsonAnalyzeContext *ctx,
11481300
void (*analyzefunc)(JsonAnalyzeContext *, Jsonb *, void *),
1149-
void *analyzearg)
1301+
void *analyzearg,
1302+
JsonPathDocBitmap *bitmap)
11501303
{
1151-
int row_num;
1152-
11531304
MemoryContext tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
11541305
"Json Analyze Pass Context",
11551306
ALLOCSET_DEFAULT_MINSIZE,
11561307
ALLOCSET_DEFAULT_INITSIZE,
11571308
ALLOCSET_DEFAULT_MAXSIZE);
1158-
11591309
MemoryContext oldcxt = MemoryContextSwitchTo(tmpcxt);
1310+
int row_num = -1;
11601311

11611312
ctx->null_cnt = 0;
11621313
ctx->analyzed_cnt = 0;
11631314
ctx->total_width = 0;
11641315

1165-
/* Loop over the arrays. */
1166-
for (row_num = 0; row_num < ctx->samplerows; row_num++)
1316+
/* Loop over the jsonbs. */
1317+
for (int i = 0; i < (bitmap ? bitmap->size : ctx->samplerows); i++)
11671318
{
11681319
Datum value;
11691320
Jsonb *jb;
@@ -1172,6 +1323,16 @@ jsonAnalyzePass(JsonAnalyzeContext *ctx,
11721323

11731324
vacuum_delay_point();
11741325

1326+
if (bitmap)
1327+
{
1328+
if (bitmap->is_list)
1329+
row_num = bitmap->data.list[i];
1330+
else if (!jsonStatsBitmapNext(bitmap, &row_num))
1331+
break;
1332+
}
1333+
else
1334+
row_num = i;
1335+
11751336
value = ctx->fetchfunc(ctx->stats, row_num, &isnull);
11761337

11771338
if (isnull)
@@ -1197,6 +1358,7 @@ jsonAnalyzePass(JsonAnalyzeContext *ctx,
11971358

11981359
MemoryContextSwitchTo(oldcxt);
11991360

1361+
ctx->current_rownum = row_num;
12001362
analyzefunc(ctx, jb, analyzearg);
12011363

12021364
oldcxt = MemoryContextSwitchTo(tmpcxt);
@@ -1229,7 +1391,7 @@ compute_json_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
12291391
if (ctx.single_pass)
12301392
{
12311393
/* Collect all values of all paths */
1232-
jsonAnalyzePass(&ctx, jsonAnalyzeCollectPaths, (void *)(intptr_t) true);
1394+
jsonAnalyzePass(&ctx, jsonAnalyzeCollectPaths, (void *)(intptr_t) true, NULL);
12331395

12341396
/*
12351397
* Now that we're done with processing the documents, we sort the paths
@@ -1277,7 +1439,7 @@ compute_json_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
12771439
oldcxt = MemoryContextSwitchTo(tmpcxt);
12781440

12791441
/* Collect all paths first without accumulating any Values, sort them */
1280-
jsonAnalyzePass(&ctx, jsonAnalyzeCollectPaths, (void *)(intptr_t) false);
1442+
jsonAnalyzePass(&ctx, jsonAnalyzeCollectPaths, (void *)(intptr_t) false, NULL);
12811443
paths = jsonAnalyzeSortPaths(&ctx, &npaths);
12821444
pstats = MemoryContextAlloc(oldcxt, sizeof(*pstats) * npaths);
12831445
stack = MemoryContextAlloc(oldcxt, sizeof(*stack) * (ctx.maxdepth + 1));
@@ -1306,7 +1468,10 @@ compute_json_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
13061468
elog(DEBUG1, "analyzing json path (%d/%d) %s",
13071469
i + 1, npaths, path->pathstr);
13081470

1309-
jsonAnalyzePass(&ctx, jsonAnalyzeCollectPath, astats);
1471+
jsonAnalyzePass(&ctx, jsonAnalyzeCollectPath, astats,
1472+
/* root has no bitmap */
1473+
i > 0 ? &((JsonPathAnlDocs *) path)->bitmap : NULL);
1474+
13101475
pstats[i] = jsonAnalyzePath(&ctx, astats,
13111476
path->depth ? &stack[path->depth - 1] : NULL);
13121477

0 commit comments

Comments
 (0)