Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit cd87628

Browse files
author
Nikita Glukhov
committed
Sort jsonb object values by length
1 parent cc64c7f commit cd87628

File tree

3 files changed

+184
-36
lines changed

3 files changed

+184
-36
lines changed

src/backend/utils/adt/jsonb_op.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ json_hash_internal(FunctionCallInfo fcinfo, bool is_jsonb)
324324
{
325325
/* Rotation is left to JsonbHashScalarValue() */
326326
case WJB_BEGIN_ARRAY:
327-
hash ^= JB_FARRAY;
327+
hash ^= JB_TARRAY;
328328
break;
329329
case WJB_BEGIN_OBJECT:
330-
hash ^= JB_FOBJECT;
330+
hash ^= JB_TOBJECT;
331331
break;
332332
case WJB_KEY:
333333
case WJB_VALUE:
@@ -382,10 +382,10 @@ json_hash_extended_internal(FunctionCallInfo fcinfo, bool is_jsonb)
382382
{
383383
/* Rotation is left to JsonbHashScalarValueExtended() */
384384
case WJB_BEGIN_ARRAY:
385-
hash ^= ((uint64) JB_FARRAY) << 32 | JB_FARRAY;
385+
hash ^= ((uint64) JB_TARRAY) << 32 | JB_TARRAY;
386386
break;
387387
case WJB_BEGIN_OBJECT:
388-
hash ^= ((uint64) JB_FOBJECT) << 32 | JB_FOBJECT;
388+
hash ^= ((uint64) JB_TOBJECT) << 32 | JB_TOBJECT;
389389
break;
390390
case WJB_KEY:
391391
case WJB_VALUE:

src/backend/utils/adt/jsonb_util.c

+162-25
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "utils/memutils.h"
3232
#include "utils/varlena.h"
3333

34+
#define JSONB_SORTED_VALUES 1
35+
3436
/*
3537
* Maximum number of elements in an array (or key/value pairs in an object).
3638
* This is limited by two things: the size of the JEntry array must fit
@@ -81,6 +83,7 @@ struct JsonbIterator
8183
const JEntry *children; /* JEntrys for child nodes */
8284
/* Data proper. This points to the beginning of the variable-length data */
8385
char *dataProper;
86+
uint32 *kvMap;
8487

8588
/* Current item in buffer (up to nElems) */
8689
int curIndex;
@@ -562,6 +565,8 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
562565
const JEntry *children = container->children;
563566
int count = JsonContainerSize(jsc);
564567
char *baseAddr;
568+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
569+
const uint32 *kvmap;
565570
uint32 stopLow,
566571
stopHigh;
567572

@@ -575,7 +580,16 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
575580
* Binary search the container. Since we know this is an object, account
576581
* for *Pairs* of Jentrys
577582
*/
578-
baseAddr = (char *) (children + count * 2);
583+
if (sorted_values)
584+
{
585+
kvmap = &children[count * 2];
586+
baseAddr = (char *) &kvmap[count];
587+
}
588+
else
589+
{
590+
kvmap = NULL;
591+
baseAddr = (char *) (children + count * 2);
592+
}
579593
stopLow = 0;
580594
stopHigh = count;
581595
while (stopLow < stopHigh)
@@ -596,7 +610,7 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
596610
if (difference == 0)
597611
{
598612
/* Found our key, return corresponding value */
599-
int index = stopMiddle + count;
613+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
600614

601615
if (!res)
602616
res = palloc(sizeof(JsonbValue));
@@ -1134,14 +1148,18 @@ JsonbIteratorNext(JsonIterator **jsit, JsonbValue *val, bool skipNested)
11341148
(*it)->state = JBI_OBJECT_KEY;
11351149

11361150
fillCompressedJsonbValue((*it)->compressed, (*it)->container,
1137-
(*it)->curIndex + (*it)->nElems,
1138-
(*it)->dataProper, (*it)->curValueOffset,
1151+
((*it)->kvMap ? (*it)->kvMap[(*it)->curIndex] : (*it)->curIndex) + (*it)->nElems,
1152+
(*it)->dataProper,
1153+
(*it)->kvMap ?
1154+
getJsonbOffset((*it)->container, (*it)->kvMap[(*it)->curIndex] + (*it)->nElems) :
1155+
(*it)->curValueOffset,
11391156
val);
11401157

11411158
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
11421159
(*it)->children[(*it)->curIndex]);
1143-
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1144-
(*it)->children[(*it)->curIndex + (*it)->nElems]);
1160+
if (!(*it)->kvMap)
1161+
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1162+
(*it)->children[(*it)->curIndex + (*it)->nElems]);
11451163
(*it)->curIndex++;
11461164

11471165
/*
@@ -1193,24 +1211,34 @@ jsonbIteratorInit(JsonContainer *cont, const JsonbContainer *container,
11931211
/* Array starts just after header */
11941212
it->children = container->children;
11951213

1196-
switch (container->header & (JB_FARRAY | JB_FOBJECT))
1214+
switch (container->header & JB_TMASK)
11971215
{
1198-
case JB_FARRAY:
1216+
case JB_TSCALAR:
1217+
it->isScalar = true;
1218+
/* FALLTHROUGH */
1219+
case JB_TARRAY:
11991220
it->dataProper =
12001221
(char *) it->children + it->nElems * sizeof(JEntry);
1201-
it->isScalar = (container->header & JB_FSCALAR) != 0;
12021222
/* This is either a "raw scalar", or an array */
12031223
Assert(!it->isScalar || it->nElems == 1);
12041224

12051225
it->state = JBI_ARRAY_START;
12061226
break;
12071227

1208-
case JB_FOBJECT:
1228+
case JB_TOBJECT:
1229+
it->kvMap = NULL;
12091230
it->dataProper =
12101231
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
12111232
it->state = JBI_OBJECT_START;
12121233
break;
12131234

1235+
case JB_TOBJECT_SORTED:
1236+
it->kvMap = (uint32 *)
1237+
((char *) it->children + it->nElems * sizeof(JEntry) * 2);
1238+
it->dataProper = (char *) &it->kvMap[it->nElems];
1239+
it->state = JBI_OBJECT_START;
1240+
break;
1241+
12141242
default:
12151243
elog(ERROR, "unknown type of jsonb container");
12161244
}
@@ -1814,13 +1842,14 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18141842
* Construct the header Jentry and store it in the beginning of the
18151843
* variable-length payload.
18161844
*/
1817-
header = nElems | JB_FARRAY;
18181845
if (val->val.array.rawScalar)
18191846
{
18201847
Assert(nElems == 1);
18211848
Assert(level == 0);
1822-
header |= JB_FSCALAR;
1849+
header = nElems | JB_TSCALAR;
18231850
}
1851+
else
1852+
header = nElems | JB_TARRAY;
18241853

18251854
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
18261855

@@ -1878,6 +1907,48 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18781907
*pheader = JENTRY_ISCONTAINER | totallen;
18791908
}
18801909

1910+
static int
1911+
int_cmp(const void *a, const void *b)
1912+
{
1913+
int x = *(const int *) a;
1914+
int y = *(const int *) b;
1915+
1916+
return x == y ? 0 : x > y ? 1 : -1;
1917+
}
1918+
1919+
static int
1920+
estimateJsonbValueSize(const JsonbValue *jbv)
1921+
{
1922+
int size;
1923+
1924+
switch (jbv->type)
1925+
{
1926+
case jbvNull:
1927+
case jbvBool:
1928+
return 0;
1929+
case jbvString:
1930+
return jbv->val.string.len;
1931+
case jbvNumeric:
1932+
return VARSIZE_ANY(jbv->val.numeric);
1933+
case jbvArray:
1934+
size = offsetof(JsonbContainer, children[jbv->val.array.nElems]);
1935+
for (int i = 0; i < jbv->val.array.nElems; i++)
1936+
size += estimateJsonbValueSize(&jbv->val.array.elems[i]);
1937+
return size;
1938+
case jbvObject:
1939+
size = offsetof(JsonbContainer, children[jbv->val.object.nPairs * 2]);
1940+
for (int i = 0; i < jbv->val.object.nPairs; i++)
1941+
{
1942+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].key);
1943+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].value);
1944+
}
1945+
return size;
1946+
default:
1947+
elog(ERROR, "invalid jsonb value type: %d", jbv->type);
1948+
return 0;
1949+
}
1950+
}
1951+
18811952
static void
18821953
convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int level)
18831954
{
@@ -1887,9 +1958,39 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
18871958
int totallen;
18881959
uint32 header;
18891960
int nPairs = val->val.object.nPairs;
1961+
int reserved_size;
1962+
bool sorted_values = JSONB_SORTED_VALUES && nPairs > 1;
1963+
struct
1964+
{
1965+
int size;
1966+
int32 index;
1967+
} *values = sorted_values ? palloc(sizeof(*values) * nPairs) : NULL;
18901968

18911969
Assert(nPairs >= 0);
18921970

1971+
if (sorted_values)
1972+
{
1973+
for (i = 0; i < nPairs; i++)
1974+
{
1975+
values[i].index = i;
1976+
values[i].size = estimateJsonbValueSize(&val->val.object.pairs[i].value);
1977+
}
1978+
1979+
qsort(values, nPairs, sizeof(*values), int_cmp);
1980+
1981+
/* check if keys were really moved */
1982+
sorted_values = false;
1983+
1984+
for (i = 0; i < nPairs; i++)
1985+
{
1986+
if (values[i].index != i)
1987+
{
1988+
sorted_values = true;
1989+
break;
1990+
}
1991+
}
1992+
}
1993+
18931994
/* Remember where in the buffer this object starts. */
18941995
base_offset = buffer->len;
18951996

@@ -1900,17 +2001,30 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19002001
* Construct the header Jentry and store it in the beginning of the
19012002
* variable-length payload.
19022003
*/
1903-
header = nPairs | JB_FOBJECT;
2004+
header = nPairs | (sorted_values ? JB_TOBJECT_SORTED : JB_TOBJECT);
19042005
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
19052006

19062007
/* Reserve space for the JEntries of the keys and values. */
1907-
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
2008+
reserved_size = sizeof(JEntry) * nPairs * 2;
2009+
if (sorted_values)
2010+
reserved_size += sizeof(int32) * nPairs;
2011+
2012+
jentry_offset = reserveFromBuffer(buffer, reserved_size);
2013+
2014+
/* Write key-value map */
2015+
if (sorted_values)
2016+
{
2017+
for (i = 0; i < nPairs; i++)
2018+
copyToBuffer(buffer, jentry_offset + sizeof(JEntry) * nPairs * 2 + values[i].index * sizeof(int32),
2019+
&i, sizeof(int32));
2020+
}
19082021

19092022
/*
19102023
* Iterate over the keys, then over the values, since that is the ordering
19112024
* we want in the on-disk representation.
19122025
*/
19132026
totallen = 0;
2027+
19142028
for (i = 0; i < nPairs; i++)
19152029
{
19162030
JsonbPair *pair = &val->val.object.pairs[i];
@@ -1946,9 +2060,11 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19462060
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
19472061
jentry_offset += sizeof(JEntry);
19482062
}
2063+
19492064
for (i = 0; i < nPairs; i++)
19502065
{
1951-
JsonbPair *pair = &val->val.object.pairs[i];
2066+
int val_index = sorted_values ? values[i].index : i;
2067+
JsonbPair *pair = &val->val.object.pairs[val_index];
19522068
int len;
19532069
JEntry meta;
19542070

@@ -1982,6 +2098,9 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19822098
jentry_offset += sizeof(JEntry);
19832099
}
19842100

2101+
if (values)
2102+
pfree(values);
2103+
19852104
/* Total data size is everything we've appended to buffer */
19862105
totallen = buffer->len - base_offset;
19872106

@@ -2276,16 +2395,35 @@ JsonUniquify(Json *json)
22762395
return json;
22772396
}
22782397

2398+
static void
2399+
jsonbInitContainerFromHeader(JsonContainerData *jc, JsonbContainer *jbc)
2400+
{
2401+
jc->size = jbc->header & JB_CMASK;
2402+
switch (jbc->header & JB_TMASK)
2403+
{
2404+
case JB_TOBJECT:
2405+
case JB_TOBJECT_SORTED:
2406+
jc->type = jbvObject;
2407+
break;
2408+
case JB_TARRAY:
2409+
jc->type = jbvArray;
2410+
break;
2411+
case JB_TSCALAR:
2412+
jc->type = jbvArray | jbvScalar;
2413+
break;
2414+
default:
2415+
elog(ERROR, "invalid jsonb container type: %d",
2416+
jbc->header & JB_TMASK);
2417+
}
2418+
}
2419+
22792420
static void
22802421
jsonbInitContainer(JsonContainerData *jc, JsonbContainer *jbc, int len)
22812422
{
22822423
jc->ops = &jsonbContainerOps;
22832424
JsonContainerDataPtr(jc) = jbc;
22842425
jc->len = len;
2285-
jc->size = jbc->header & JB_CMASK;
2286-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2287-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2288-
jbvArray;
2426+
jsonbInitContainerFromHeader(jc, jbc);
22892427
}
22902428

22912429
static void
@@ -2399,10 +2537,7 @@ jsonbzInitContainer(JsonContainerData *jc, CompressedJsonb *cjb, int len)
23992537

24002538
jc->ops = &jsonbzContainerOps;
24012539
jc->len = len;
2402-
jc->size = jbc->header & JB_CMASK;
2403-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2404-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2405-
jbvArray;
2540+
jsonbInitContainerFromHeader(jc, jbc);
24062541
}
24072542

24082543
static JsonbContainer *
@@ -2470,7 +2605,9 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
24702605
JEntry *children = container->children;
24712606
int count = container->header & JB_CMASK;
24722607
/* Since this is an object, account for *Pairs* of Jentrys */
2473-
char *base_addr = (char *) (children + count * 2);
2608+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
2609+
char *base_addr = (char *) (children + count * 2) + (sorted_values ? sizeof(uint32) * count : 0);
2610+
uint32 *kvmap = sorted_values ? &container->children[count * 2] : NULL;
24742611
Size base_offset = base_addr - (char *) jb;
24752612
uint32 stopLow = 0,
24762613
stopHigh = count;
@@ -2512,7 +2649,7 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25122649
if (difference == 0)
25132650
{
25142651
/* Found our key, return corresponding value */
2515-
int index = stopMiddle + count;
2652+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
25162653

25172654
return fillCompressedJsonbValue(cjb, container, index, base_addr,
25182655
getJsonbOffset(container, index),

0 commit comments

Comments
 (0)