@@ -153,6 +153,25 @@ typedef struct JsonValueStats
153
153
* (for avg. array length) */
154
154
} JsonValueStats ;
155
155
156
+ typedef struct JsonPathDocBitmap
157
+ {
158
+ bool is_list ;
159
+ int size ;
160
+ int allocated ;
161
+ union
162
+ {
163
+ int32 * list ;
164
+ uint8 * bitmap ;
165
+ } data ;
166
+ } JsonPathDocBitmap ;
167
+
168
+ /* JSON path and list of documents containing it */
169
+ typedef struct JsonPathAnlDocs
170
+ {
171
+ JsonPathEntry path ;
172
+ JsonPathDocBitmap bitmap ;
173
+ } JsonPathAnlDocs ;
174
+
156
175
/* Main structure for analyzed JSON path */
157
176
typedef struct JsonPathAnlStats
158
177
{
@@ -181,6 +200,7 @@ typedef struct JsonAnalyzeContext
181
200
double totalrows ;
182
201
double total_width ;
183
202
int samplerows ;
203
+ int current_rownum ;
184
204
int target ;
185
205
int null_cnt ;
186
206
int analyzed_cnt ;
@@ -227,6 +247,131 @@ JsonPathEntryHash(const void *key, Size keysize)
227
247
return hash ;
228
248
}
229
249
250
+ static void
251
+ jsonStatsBitmapInit (JsonPathDocBitmap * bitmap )
252
+ {
253
+ memset (bitmap , 0 , sizeof (* bitmap ));
254
+ bitmap -> is_list = true;
255
+ }
256
+
257
+ static void
258
+ jsonStatsBitmapAdd (JsonAnalyzeContext * cxt , JsonPathDocBitmap * bitmap , int doc )
259
+ {
260
+ /* Use more compact list representation if not too many bits set */
261
+ if (bitmap -> is_list )
262
+ {
263
+ int * list = bitmap -> data .list ;
264
+
265
+ #if 1 /* Enable list representation */
266
+ if (bitmap -> size > 0 && list [bitmap -> size - 1 ] == doc )
267
+ return ;
268
+
269
+ if (bitmap -> size < cxt -> samplerows / sizeof (list [0 ]) / 8 )
270
+ {
271
+ if (bitmap -> size >= bitmap -> allocated )
272
+ {
273
+ MemoryContext oldcxt = MemoryContextSwitchTo (cxt -> mcxt );
274
+
275
+ if (bitmap -> allocated )
276
+ {
277
+ bitmap -> allocated *= 2 ;
278
+ list = repalloc (list , sizeof (list [0 ]) * bitmap -> allocated );
279
+ }
280
+ else
281
+ {
282
+ bitmap -> allocated = 8 ;
283
+ list = palloc (sizeof (list [0 ]) * bitmap -> allocated );
284
+ }
285
+
286
+ bitmap -> data .list = list ;
287
+
288
+ MemoryContextSwitchTo (oldcxt );
289
+ }
290
+
291
+ list [bitmap -> size ++ ] = doc ;
292
+ return ;
293
+ }
294
+ #endif
295
+ /* convert list to bitmap */
296
+ bitmap -> allocated = (cxt -> samplerows + 7 ) / 8 ;
297
+ bitmap -> data .bitmap = MemoryContextAllocZero (cxt -> mcxt , bitmap -> allocated );
298
+ bitmap -> is_list = false;
299
+
300
+ if (list )
301
+ {
302
+ for (int i = 0 ; i < bitmap -> size ; i ++ )
303
+ {
304
+ int d = list [i ];
305
+
306
+ bitmap -> data .bitmap [d / 8 ] |= (1 << (d % 8 ));
307
+ }
308
+
309
+ pfree (list );
310
+ }
311
+ }
312
+
313
+ /* set bit in bitmap */
314
+ if (doc < cxt -> samplerows &&
315
+ !(bitmap -> data .bitmap [doc / 8 ] & (1 << (doc % 8 ))))
316
+ {
317
+ bitmap -> data .bitmap [doc / 8 ] |= (1 << (doc % 8 ));
318
+ bitmap -> size ++ ;
319
+ }
320
+ }
321
+
322
+ static bool
323
+ jsonStatsBitmapNext (JsonPathDocBitmap * bitmap , int * pbit )
324
+ {
325
+ uint8 * bmp = bitmap -> data .bitmap ;
326
+ uint8 * pb ;
327
+ uint8 * pb_end = & bmp [bitmap -> allocated ];
328
+ int bit = * pbit ;
329
+
330
+ Assert (!bitmap -> is_list );
331
+
332
+ if (bit < 0 )
333
+ {
334
+ pb = bmp ;
335
+ bit = 0 ;
336
+ }
337
+ else
338
+ {
339
+ ++ bit ;
340
+ pb = & bmp [bit / 8 ];
341
+ bit %= 8 ;
342
+ }
343
+
344
+ for (; pb < pb_end ; pb ++ , bit = 0 )
345
+ {
346
+ uint8 b ;
347
+
348
+ /* Skip zero bytes */
349
+ if (!bit )
350
+ {
351
+ while (!* pb )
352
+ {
353
+ if (++ pb >= pb_end )
354
+ return false;
355
+ }
356
+ }
357
+
358
+ b = * pb ;
359
+
360
+ /* Skip zero bits */
361
+ while (bit < 8 && !(b & (1 << bit )))
362
+ bit ++ ;
363
+
364
+ if (bit >= 8 )
365
+ continue ; /* Non-zero bit not found, go to next byte */
366
+
367
+ /* Output next non-zero bit */
368
+ * pbit = (pb - bmp ) * 8 + bit ;
369
+ return true;
370
+ }
371
+
372
+ return false;
373
+ }
374
+
230
375
static void
231
376
jsonStatsAnlInit (JsonPathAnlStats * stats )
232
377
{
@@ -295,6 +440,8 @@ jsonAnalyzeAddPath(JsonAnalyzeContext *ctx, JsonPathEntry *parent,
295
440
296
441
if (ctx -> single_pass )
297
442
jsonStatsAnlInit ((JsonPathAnlStats * ) stats );
443
+ else
444
+ jsonStatsBitmapInit (& ((JsonPathAnlDocs * ) stats )-> bitmap );
298
445
299
446
stats -> depth = parent -> depth + 1 ;
300
447
@@ -437,6 +584,7 @@ jsonAnalyzeCollectPaths(JsonAnalyzeContext *ctx, Jsonb *jb, void *param)
437
584
JsonbIterator * it ;
438
585
JsonbIteratorToken tok ;
439
586
JsonPathEntry * stats = & ctx -> root -> path ;
587
+ int doc = ctx -> current_rownum ;
440
588
bool collect_values = (bool )(intptr_t ) param ;
441
589
bool scalar = false;
442
590
@@ -502,6 +650,10 @@ jsonAnalyzeCollectPaths(JsonAnalyzeContext *ctx, Jsonb *jb, void *param)
502
650
jsonAnalyzeJsonValue (ctx ,
503
651
& ((JsonPathAnlStats * ) stats )-> vstats ,
504
652
& jv );
653
+ else if (stats != & ctx -> root -> path )
654
+ jsonStatsBitmapAdd (ctx ,
655
+ & ((JsonPathAnlDocs * ) stats )-> bitmap ,
656
+ doc );
505
657
506
658
/*
507
659
* Manually recurse into container by creating child iterator.
@@ -1124,7 +1276,7 @@ jsonAnalyzeInit(JsonAnalyzeContext *ctx, VacAttrStats *stats,
1124
1276
1125
1277
MemSet (& hash_ctl , 0 , sizeof (hash_ctl ));
1126
1278
hash_ctl .keysize = sizeof (JsonPathEntry );
1127
- hash_ctl .entrysize = ctx -> single_pass ? sizeof (JsonPathAnlStats ) : sizeof (JsonPathEntry );
1279
+ hash_ctl .entrysize = ctx -> single_pass ? sizeof (JsonPathAnlStats ) : sizeof (JsonPathAnlDocs );
1128
1280
hash_ctl .hash = JsonPathEntryHash ;
1129
1281
hash_ctl .match = JsonPathEntryMatch ;
1130
1282
hash_ctl .hcxt = ctx -> mcxt ;
@@ -1146,24 +1298,23 @@ jsonAnalyzeInit(JsonAnalyzeContext *ctx, VacAttrStats *stats,
1146
1298
static void
1147
1299
jsonAnalyzePass (JsonAnalyzeContext * ctx ,
1148
1300
void (* analyzefunc )(JsonAnalyzeContext * , Jsonb * , void * ),
1149
- void * analyzearg )
1301
+ void * analyzearg ,
1302
+ JsonPathDocBitmap * bitmap )
1150
1303
{
1151
- int row_num ;
1152
-
1153
1304
MemoryContext tmpcxt = AllocSetContextCreate (CurrentMemoryContext ,
1154
1305
"Json Analyze Pass Context" ,
1155
1306
ALLOCSET_DEFAULT_MINSIZE ,
1156
1307
ALLOCSET_DEFAULT_INITSIZE ,
1157
1308
ALLOCSET_DEFAULT_MAXSIZE );
1158
-
1159
1309
MemoryContext oldcxt = MemoryContextSwitchTo (tmpcxt );
1310
+ int row_num = -1 ;
1160
1311
1161
1312
ctx -> null_cnt = 0 ;
1162
1313
ctx -> analyzed_cnt = 0 ;
1163
1314
ctx -> total_width = 0 ;
1164
1315
1165
- /* Loop over the arrays . */
1166
- for (row_num = 0 ; row_num < ctx -> samplerows ; row_num ++ )
1316
+ /* Loop over the jsonbs . */
1317
+ for (int i = 0 ; i < ( bitmap ? bitmap -> size : ctx -> samplerows ); i ++ )
1167
1318
{
1168
1319
Datum value ;
1169
1320
Jsonb * jb ;
@@ -1172,6 +1323,16 @@ jsonAnalyzePass(JsonAnalyzeContext *ctx,
1172
1323
1173
1324
vacuum_delay_point ();
1174
1325
1326
+ if (bitmap )
1327
+ {
1328
+ if (bitmap -> is_list )
1329
+ row_num = bitmap -> data .list [i ];
1330
+ else if (!jsonStatsBitmapNext (bitmap , & row_num ))
1331
+ break ;
1332
+ }
1333
+ else
1334
+ row_num = i ;
1335
+
1175
1336
value = ctx -> fetchfunc (ctx -> stats , row_num , & isnull );
1176
1337
1177
1338
if (isnull )
@@ -1197,6 +1358,7 @@ jsonAnalyzePass(JsonAnalyzeContext *ctx,
1197
1358
1198
1359
MemoryContextSwitchTo (oldcxt );
1199
1360
1361
+ ctx -> current_rownum = row_num ;
1200
1362
analyzefunc (ctx , jb , analyzearg );
1201
1363
1202
1364
oldcxt = MemoryContextSwitchTo (tmpcxt );
@@ -1229,7 +1391,7 @@ compute_json_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1229
1391
if (ctx .single_pass )
1230
1392
{
1231
1393
/* Collect all values of all paths */
1232
- jsonAnalyzePass (& ctx , jsonAnalyzeCollectPaths , (void * )(intptr_t ) true);
1394
+ jsonAnalyzePass (& ctx , jsonAnalyzeCollectPaths , (void * )(intptr_t ) true, NULL );
1233
1395
1234
1396
/*
1235
1397
* Now that we're done with processing the documents, we sort the paths
@@ -1277,7 +1439,7 @@ compute_json_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1277
1439
oldcxt = MemoryContextSwitchTo (tmpcxt );
1278
1440
1279
1441
/* Collect all paths first without accumulating any Values, sort them */
1280
- jsonAnalyzePass (& ctx , jsonAnalyzeCollectPaths , (void * )(intptr_t ) false);
1442
+ jsonAnalyzePass (& ctx , jsonAnalyzeCollectPaths , (void * )(intptr_t ) false, NULL );
1281
1443
paths = jsonAnalyzeSortPaths (& ctx , & npaths );
1282
1444
pstats = MemoryContextAlloc (oldcxt , sizeof (* pstats ) * npaths );
1283
1445
stack = MemoryContextAlloc (oldcxt , sizeof (* stack ) * (ctx .maxdepth + 1 ));
@@ -1306,7 +1468,10 @@ compute_json_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1306
1468
elog (DEBUG1 , "analyzing json path (%d/%d) %s" ,
1307
1469
i + 1 , npaths , path -> pathstr );
1308
1470
1309
- jsonAnalyzePass (& ctx , jsonAnalyzeCollectPath , astats );
1471
+ jsonAnalyzePass (& ctx , jsonAnalyzeCollectPath , astats ,
1472
+ /* root has no bitmap */
1473
+ i > 0 ? & ((JsonPathAnlDocs * ) path )-> bitmap : NULL );
1474
+
1310
1475
pstats [i ] = jsonAnalyzePath (& ctx , astats ,
1311
1476
path -> depth ? & stack [path -> depth - 1 ] : NULL );
1312
1477
0 commit comments