Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7b8069b

Browse files
author
Nikita Glukhov
committed
Sort jsonb object values by length
1 parent 4eaf89f commit 7b8069b

File tree

3 files changed

+184
-36
lines changed

3 files changed

+184
-36
lines changed

src/backend/utils/adt/jsonb_op.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ json_hash_internal(FunctionCallInfo fcinfo, bool is_jsonb)
324324
{
325325
/* Rotation is left to JsonbHashScalarValue() */
326326
case WJB_BEGIN_ARRAY:
327-
hash ^= JB_FARRAY;
327+
hash ^= JB_TARRAY;
328328
break;
329329
case WJB_BEGIN_OBJECT:
330-
hash ^= JB_FOBJECT;
330+
hash ^= JB_TOBJECT;
331331
break;
332332
case WJB_KEY:
333333
case WJB_VALUE:
@@ -382,10 +382,10 @@ json_hash_extended_internal(FunctionCallInfo fcinfo, bool is_jsonb)
382382
{
383383
/* Rotation is left to JsonbHashScalarValueExtended() */
384384
case WJB_BEGIN_ARRAY:
385-
hash ^= ((uint64) JB_FARRAY) << 32 | JB_FARRAY;
385+
hash ^= ((uint64) JB_TARRAY) << 32 | JB_TARRAY;
386386
break;
387387
case WJB_BEGIN_OBJECT:
388-
hash ^= ((uint64) JB_FOBJECT) << 32 | JB_FOBJECT;
388+
hash ^= ((uint64) JB_TOBJECT) << 32 | JB_TOBJECT;
389389
break;
390390
case WJB_KEY:
391391
case WJB_VALUE:

src/backend/utils/adt/jsonb_util.c

Lines changed: 162 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "utils/memutils.h"
3232
#include "utils/varlena.h"
3333

34+
#define JSONB_SORTED_VALUES 1
35+
3436
/*
3537
* Maximum number of elements in an array (or key/value pairs in an object).
3638
* This is limited by two things: the size of the JEntry array must fit
@@ -81,6 +83,7 @@ struct JsonbIterator
8183
const JEntry *children; /* JEntrys for child nodes */
8284
/* Data proper. This points to the beginning of the variable-length data */
8385
char *dataProper;
86+
uint32 *kvMap;
8487

8588
/* Current item in buffer (up to nElems) */
8689
int curIndex;
@@ -561,6 +564,8 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
561564
const JEntry *children = container->children;
562565
int count = JsonContainerSize(jsc);
563566
char *baseAddr;
567+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
568+
const uint32 *kvmap;
564569
uint32 stopLow,
565570
stopHigh;
566571

@@ -574,7 +579,16 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
574579
* Binary search the container. Since we know this is an object, account
575580
* for *Pairs* of Jentrys
576581
*/
577-
baseAddr = (char *) (children + count * 2);
582+
if (sorted_values)
583+
{
584+
kvmap = &children[count * 2];
585+
baseAddr = (char *) &kvmap[count];
586+
}
587+
else
588+
{
589+
kvmap = NULL;
590+
baseAddr = (char *) (children + count * 2);
591+
}
578592
stopLow = 0;
579593
stopHigh = count;
580594
while (stopLow < stopHigh)
@@ -595,7 +609,7 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
595609
if (difference == 0)
596610
{
597611
/* Found our key, return corresponding value */
598-
int index = stopMiddle + count;
612+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
599613

600614
if (!res)
601615
res = palloc(sizeof(JsonbValue));
@@ -1197,14 +1211,18 @@ JsonbIteratorNext(JsonIterator **jsit, JsonbValue *val, bool skipNested)
11971211
(*it)->state = JBI_OBJECT_KEY;
11981212

11991213
fillCompressedJsonbValue((*it)->compressed, (*it)->container,
1200-
(*it)->curIndex + (*it)->nElems,
1201-
(*it)->dataProper, (*it)->curValueOffset,
1214+
((*it)->kvMap ? (*it)->kvMap[(*it)->curIndex] : (*it)->curIndex) + (*it)->nElems,
1215+
(*it)->dataProper,
1216+
(*it)->kvMap ?
1217+
getJsonbOffset((*it)->container, (*it)->kvMap[(*it)->curIndex] + (*it)->nElems) :
1218+
(*it)->curValueOffset,
12021219
val);
12031220

12041221
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
12051222
(*it)->children[(*it)->curIndex]);
1206-
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1207-
(*it)->children[(*it)->curIndex + (*it)->nElems]);
1223+
if (!(*it)->kvMap)
1224+
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1225+
(*it)->children[(*it)->curIndex + (*it)->nElems]);
12081226
(*it)->curIndex++;
12091227

12101228
/*
@@ -1256,24 +1274,34 @@ jsonbIteratorInit(JsonContainer *cont, const JsonbContainer *container,
12561274
/* Array starts just after header */
12571275
it->children = container->children;
12581276

1259-
switch (container->header & (JB_FARRAY | JB_FOBJECT))
1277+
switch (container->header & JB_TMASK)
12601278
{
1261-
case JB_FARRAY:
1279+
case JB_TSCALAR:
1280+
it->isScalar = true;
1281+
/* FALLTHROUGH */
1282+
case JB_TARRAY:
12621283
it->dataProper =
12631284
(char *) it->children + it->nElems * sizeof(JEntry);
1264-
it->isScalar = (container->header & JB_FSCALAR) != 0;
12651285
/* This is either a "raw scalar", or an array */
12661286
Assert(!it->isScalar || it->nElems == 1);
12671287

12681288
it->state = JBI_ARRAY_START;
12691289
break;
12701290

1271-
case JB_FOBJECT:
1291+
case JB_TOBJECT:
1292+
it->kvMap = NULL;
12721293
it->dataProper =
12731294
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
12741295
it->state = JBI_OBJECT_START;
12751296
break;
12761297

1298+
case JB_TOBJECT_SORTED:
1299+
it->kvMap = (uint32 *)
1300+
((char *) it->children + it->nElems * sizeof(JEntry) * 2);
1301+
it->dataProper = (char *) &it->kvMap[it->nElems];
1302+
it->state = JBI_OBJECT_START;
1303+
break;
1304+
12771305
default:
12781306
elog(ERROR, "unknown type of jsonb container");
12791307
}
@@ -1877,13 +1905,14 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18771905
* Construct the header Jentry and store it in the beginning of the
18781906
* variable-length payload.
18791907
*/
1880-
header = nElems | JB_FARRAY;
18811908
if (val->val.array.rawScalar)
18821909
{
18831910
Assert(nElems == 1);
18841911
Assert(level == 0);
1885-
header |= JB_FSCALAR;
1912+
header = nElems | JB_TSCALAR;
18861913
}
1914+
else
1915+
header = nElems | JB_TARRAY;
18871916

18881917
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
18891918

@@ -1941,6 +1970,48 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
19411970
*pheader = JENTRY_ISCONTAINER | totallen;
19421971
}
19431972

1973+
static int
1974+
int_cmp(const void *a, const void *b)
1975+
{
1976+
int x = *(const int *) a;
1977+
int y = *(const int *) b;
1978+
1979+
return x == y ? 0 : x > y ? 1 : -1;
1980+
}
1981+
1982+
static int
1983+
estimateJsonbValueSize(const JsonbValue *jbv)
1984+
{
1985+
int size;
1986+
1987+
switch (jbv->type)
1988+
{
1989+
case jbvNull:
1990+
case jbvBool:
1991+
return 0;
1992+
case jbvString:
1993+
return jbv->val.string.len;
1994+
case jbvNumeric:
1995+
return VARSIZE_ANY(jbv->val.numeric);
1996+
case jbvArray:
1997+
size = offsetof(JsonbContainer, children[jbv->val.array.nElems]);
1998+
for (int i = 0; i < jbv->val.array.nElems; i++)
1999+
size += estimateJsonbValueSize(&jbv->val.array.elems[i]);
2000+
return size;
2001+
case jbvObject:
2002+
size = offsetof(JsonbContainer, children[jbv->val.object.nPairs * 2]);
2003+
for (int i = 0; i < jbv->val.object.nPairs; i++)
2004+
{
2005+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].key);
2006+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].value);
2007+
}
2008+
return size;
2009+
default:
2010+
elog(ERROR, "invalid jsonb value type: %d", jbv->type);
2011+
return 0;
2012+
}
2013+
}
2014+
19442015
static void
19452016
convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int level)
19462017
{
@@ -1950,9 +2021,39 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19502021
int totallen;
19512022
uint32 header;
19522023
int nPairs = val->val.object.nPairs;
2024+
int reserved_size;
2025+
bool sorted_values = JSONB_SORTED_VALUES && nPairs > 1;
2026+
struct
2027+
{
2028+
int size;
2029+
int32 index;
2030+
} *values = sorted_values ? palloc(sizeof(*values) * nPairs) : NULL;
19532031

19542032
Assert(nPairs >= 0);
19552033

2034+
if (sorted_values)
2035+
{
2036+
for (i = 0; i < nPairs; i++)
2037+
{
2038+
values[i].index = i;
2039+
values[i].size = estimateJsonbValueSize(&val->val.object.pairs[i].value);
2040+
}
2041+
2042+
qsort(values, nPairs, sizeof(*values), int_cmp);
2043+
2044+
/* check if keys were really moved */
2045+
sorted_values = false;
2046+
2047+
for (i = 0; i < nPairs; i++)
2048+
{
2049+
if (values[i].index != i)
2050+
{
2051+
sorted_values = true;
2052+
break;
2053+
}
2054+
}
2055+
}
2056+
19562057
/* Remember where in the buffer this object starts. */
19572058
base_offset = buffer->len;
19582059

@@ -1963,17 +2064,30 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19632064
* Construct the header Jentry and store it in the beginning of the
19642065
* variable-length payload.
19652066
*/
1966-
header = nPairs | JB_FOBJECT;
2067+
header = nPairs | (sorted_values ? JB_TOBJECT_SORTED : JB_TOBJECT);
19672068
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
19682069

19692070
/* Reserve space for the JEntries of the keys and values. */
1970-
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
2071+
reserved_size = sizeof(JEntry) * nPairs * 2;
2072+
if (sorted_values)
2073+
reserved_size += sizeof(int32) * nPairs;
2074+
2075+
jentry_offset = reserveFromBuffer(buffer, reserved_size);
2076+
2077+
/* Write key-value map */
2078+
if (sorted_values)
2079+
{
2080+
for (i = 0; i < nPairs; i++)
2081+
copyToBuffer(buffer, jentry_offset + sizeof(JEntry) * nPairs * 2 + values[i].index * sizeof(int32),
2082+
&i, sizeof(int32));
2083+
}
19712084

19722085
/*
19732086
* Iterate over the keys, then over the values, since that is the ordering
19742087
* we want in the on-disk representation.
19752088
*/
19762089
totallen = 0;
2090+
19772091
for (i = 0; i < nPairs; i++)
19782092
{
19792093
JsonbPair *pair = &val->val.object.pairs[i];
@@ -2009,9 +2123,11 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20092123
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
20102124
jentry_offset += sizeof(JEntry);
20112125
}
2126+
20122127
for (i = 0; i < nPairs; i++)
20132128
{
2014-
JsonbPair *pair = &val->val.object.pairs[i];
2129+
int val_index = sorted_values ? values[i].index : i;
2130+
JsonbPair *pair = &val->val.object.pairs[val_index];
20152131
int len;
20162132
JEntry meta;
20172133

@@ -2045,6 +2161,9 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20452161
jentry_offset += sizeof(JEntry);
20462162
}
20472163

2164+
if (values)
2165+
pfree(values);
2166+
20482167
/* Total data size is everything we've appended to buffer */
20492168
totallen = buffer->len - base_offset;
20502169

@@ -2339,16 +2458,35 @@ JsonUniquify(Json *json)
23392458
return json;
23402459
}
23412460

2461+
static void
2462+
jsonbInitContainerFromHeader(JsonContainerData *jc, JsonbContainer *jbc)
2463+
{
2464+
jc->size = jbc->header & JB_CMASK;
2465+
switch (jbc->header & JB_TMASK)
2466+
{
2467+
case JB_TOBJECT:
2468+
case JB_TOBJECT_SORTED:
2469+
jc->type = jbvObject;
2470+
break;
2471+
case JB_TARRAY:
2472+
jc->type = jbvArray;
2473+
break;
2474+
case JB_TSCALAR:
2475+
jc->type = jbvArray | jbvScalar;
2476+
break;
2477+
default:
2478+
elog(ERROR, "invalid jsonb container type: %d",
2479+
jbc->header & JB_TMASK);
2480+
}
2481+
}
2482+
23422483
static void
23432484
jsonbInitContainer(JsonContainerData *jc, JsonbContainer *jbc, int len)
23442485
{
23452486
jc->ops = &jsonbContainerOps;
23462487
JsonContainerDataPtr(jc) = jbc;
23472488
jc->len = len;
2348-
jc->size = jbc->header & JB_CMASK;
2349-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2350-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2351-
jbvArray;
2489+
jsonbInitContainerFromHeader(jc, jbc);
23522490
}
23532491

23542492
static void
@@ -2462,10 +2600,7 @@ jsonbzInitContainer(JsonContainerData *jc, CompressedJsonb *cjb, int len)
24622600

24632601
jc->ops = &jsonbzContainerOps;
24642602
jc->len = len;
2465-
jc->size = jbc->header & JB_CMASK;
2466-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2467-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2468-
jbvArray;
2603+
jsonbInitContainerFromHeader(jc, jbc);
24692604
}
24702605

24712606
static JsonbContainer *
@@ -2533,7 +2668,9 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25332668
JEntry *children = container->children;
25342669
int count = container->header & JB_CMASK;
25352670
/* Since this is an object, account for *Pairs* of Jentrys */
2536-
char *base_addr = (char *) (children + count * 2);
2671+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
2672+
char *base_addr = (char *) (children + count * 2) + (sorted_values ? sizeof(uint32) * count : 0);
2673+
uint32 *kvmap = sorted_values ? &container->children[count * 2] : NULL;
25372674
Size base_offset = base_addr - (char *) jb;
25382675
uint32 stopLow = 0,
25392676
stopHigh = count;
@@ -2575,7 +2712,7 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25752712
if (difference == 0)
25762713
{
25772714
/* Found our key, return corresponding value */
2578-
int index = stopMiddle + count;
2715+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
25792716

25802717
return fillCompressedJsonbValue(cjb, container, index, base_addr,
25812718
getJsonbOffset(container, index),

0 commit comments

Comments
 (0)