Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e502150

Browse files
committed
Allow Memoize to operate in binary comparison mode
Memoize would always use the hash equality operator for the cache key types to determine if the current set of parameters were the same as some previously cached set. Certain types such as floating points where -0.0 and +0.0 differ in their binary representation but are classed as equal by the hash equality operator may cause problems as unless the join uses the same operator it's possible that whichever join operator is being used would be able to distinguish the two values. In which case we may accidentally return in the incorrect rows out of the cache. To fix this here we add a binary mode to Memoize to allow it to the current set of parameters to previously cached values by comparing bit-by-bit rather than logically using the hash equality operator. This binary mode is always used for LATERAL joins and it's used for normal joins when any of the join operators are not hashable. Reported-by: Tom Lane Author: David Rowley Discussion: https://postgr.es/m/3004308.1632952496@sss.pgh.pa.us Backpatch-through: 14, where Memoize was added
1 parent 1922d7c commit e502150

File tree

19 files changed

+345
-39
lines changed

19 files changed

+345
-39
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

+2-1
Original file line numberDiff line numberDiff line change
@@ -2247,6 +2247,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
22472247
Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
22482248
-> Memoize
22492249
Cache Key: t1.c2
2250+
Cache Mode: binary
22502251
-> Subquery Scan on q
22512252
-> HashAggregate
22522253
Output: t2.c1, t3.c1
@@ -2255,7 +2256,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
22552256
Output: t2.c1, t3.c1
22562257
Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
22572258
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
2258-
(16 rows)
2259+
(17 rows)
22592260

22602261
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
22612262
C 1

src/backend/commands/explain.c

+3
Original file line numberDiff line numberDiff line change
@@ -3127,11 +3127,14 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
31273127
if (es->format != EXPLAIN_FORMAT_TEXT)
31283128
{
31293129
ExplainPropertyText("Cache Key", keystr.data, es);
3130+
ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
31303131
}
31313132
else
31323133
{
31333134
ExplainIndentText(es);
31343135
appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
3136+
ExplainIndentText(es);
3137+
appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical");
31353138
}
31363139

31373140
pfree(keystr.data);

src/backend/executor/nodeMemoize.c

+76-16
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#include "executor/nodeMemoize.h"
7272
#include "lib/ilist.h"
7373
#include "miscadmin.h"
74+
#include "utils/datum.h"
7475
#include "utils/lsyscache.h"
7576

7677
/* States of the ExecMemoize state machine */
@@ -131,7 +132,7 @@ typedef struct MemoizeEntry
131132

132133
static uint32 MemoizeHash_hash(struct memoize_hash *tb,
133134
const MemoizeKey *key);
134-
static int MemoizeHash_equal(struct memoize_hash *tb,
135+
static bool MemoizeHash_equal(struct memoize_hash *tb,
135136
const MemoizeKey *params1,
136137
const MemoizeKey *params2);
137138

@@ -140,7 +141,7 @@ static int MemoizeHash_equal(struct memoize_hash *tb,
140141
#define SH_KEY_TYPE MemoizeKey *
141142
#define SH_KEY key
142143
#define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key)
143-
#define SH_EQUAL(tb, a, b) (MemoizeHash_equal(tb, a, b) == 0)
144+
#define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b)
144145
#define SH_SCOPE static inline
145146
#define SH_STORE_HASH
146147
#define SH_GET_HASH(tb, a) a->hash
@@ -160,21 +161,45 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
160161
TupleTableSlot *pslot = mstate->probeslot;
161162
uint32 hashkey = 0;
162163
int numkeys = mstate->nkeys;
163-
FmgrInfo *hashfunctions = mstate->hashfunctions;
164-
Oid *collations = mstate->collations;
165164

166-
for (int i = 0; i < numkeys; i++)
165+
if (mstate->binary_mode)
167166
{
168-
/* rotate hashkey left 1 bit at each step */
169-
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
167+
for (int i = 0; i < numkeys; i++)
168+
{
169+
/* rotate hashkey left 1 bit at each step */
170+
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
171+
172+
if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */
173+
{
174+
FormData_pg_attribute *attr;
175+
uint32 hkey;
176+
177+
attr = &pslot->tts_tupleDescriptor->attrs[i];
178+
179+
hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen);
180+
181+
hashkey ^= hkey;
182+
}
183+
}
184+
}
185+
else
186+
{
187+
FmgrInfo *hashfunctions = mstate->hashfunctions;
188+
Oid *collations = mstate->collations;
170189

171-
if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */
190+
for (int i = 0; i < numkeys; i++)
172191
{
173-
uint32 hkey;
192+
/* rotate hashkey left 1 bit at each step */
193+
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
194+
195+
if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */
196+
{
197+
uint32 hkey;
174198

175-
hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
176-
collations[i], pslot->tts_values[i]));
177-
hashkey ^= hkey;
199+
hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
200+
collations[i], pslot->tts_values[i]));
201+
hashkey ^= hkey;
202+
}
178203
}
179204
}
180205

@@ -187,7 +212,7 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
187212
* table lookup. 'key2' is never used. Instead the MemoizeState's
188213
* probeslot is always populated with details of what's being looked up.
189214
*/
190-
static int
215+
static bool
191216
MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
192217
const MemoizeKey *key2)
193218
{
@@ -199,9 +224,38 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
199224
/* probeslot should have already been prepared by prepare_probe_slot() */
200225
ExecStoreMinimalTuple(key1->params, tslot, false);
201226

202-
econtext->ecxt_innertuple = tslot;
203-
econtext->ecxt_outertuple = pslot;
204-
return !ExecQualAndReset(mstate->cache_eq_expr, econtext);
227+
if (mstate->binary_mode)
228+
{
229+
int numkeys = mstate->nkeys;
230+
231+
slot_getallattrs(tslot);
232+
slot_getallattrs(pslot);
233+
234+
for (int i = 0; i < numkeys; i++)
235+
{
236+
FormData_pg_attribute *attr;
237+
238+
if (tslot->tts_isnull[i] != pslot->tts_isnull[i])
239+
return false;
240+
241+
/* both NULL? they're equal */
242+
if (tslot->tts_isnull[i])
243+
continue;
244+
245+
/* perform binary comparison on the two datums */
246+
attr = &tslot->tts_tupleDescriptor->attrs[i];
247+
if (!datum_image_eq(tslot->tts_values[i], pslot->tts_values[i],
248+
attr->attbyval, attr->attlen))
249+
return false;
250+
}
251+
return true;
252+
}
253+
else
254+
{
255+
econtext->ecxt_innertuple = tslot;
256+
econtext->ecxt_outertuple = pslot;
257+
return ExecQualAndReset(mstate->cache_eq_expr, econtext);
258+
}
205259
}
206260

207261
/*
@@ -926,6 +980,12 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags)
926980
*/
927981
mstate->singlerow = node->singlerow;
928982

983+
/*
984+
* Record if the cache keys should be compared bit by bit, or logically
985+
* using the type's hash equality operator
986+
*/
987+
mstate->binary_mode = node->binary_mode;
988+
929989
/* Zero the statistics counters */
930990
memset(&mstate->stats, 0, sizeof(MemoizeInstrumentation));
931991

src/backend/nodes/copyfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ _copyMemoize(const Memoize *from)
971971
COPY_POINTER_FIELD(collations, sizeof(Oid) * from->numKeys);
972972
COPY_NODE_FIELD(param_exprs);
973973
COPY_SCALAR_FIELD(singlerow);
974+
COPY_SCALAR_FIELD(binary_mode);
974975
COPY_SCALAR_FIELD(est_entries);
975976

976977
return newnode;

src/backend/nodes/outfuncs.c

+2
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,7 @@ _outMemoize(StringInfo str, const Memoize *node)
866866
WRITE_OID_ARRAY(collations, node->numKeys);
867867
WRITE_NODE_FIELD(param_exprs);
868868
WRITE_BOOL_FIELD(singlerow);
869+
WRITE_BOOL_FIELD(binary_mode);
869870
WRITE_UINT_FIELD(est_entries);
870871
}
871872

@@ -1966,6 +1967,7 @@ _outMemoizePath(StringInfo str, const MemoizePath *node)
19661967
WRITE_NODE_FIELD(hash_operators);
19671968
WRITE_NODE_FIELD(param_exprs);
19681969
WRITE_BOOL_FIELD(singlerow);
1970+
WRITE_BOOL_FIELD(binary_mode);
19691971
WRITE_FLOAT_FIELD(calls, "%.0f");
19701972
WRITE_UINT_FIELD(est_entries);
19711973
}

src/backend/nodes/readfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -2230,6 +2230,7 @@ _readMemoize(void)
22302230
READ_OID_ARRAY(collations, local_node->numKeys);
22312231
READ_NODE_FIELD(param_exprs);
22322232
READ_BOOL_FIELD(singlerow);
2233+
READ_BOOL_FIELD(binary_mode);
22332234
READ_UINT_FIELD(est_entries);
22342235

22352236
READ_DONE();

src/backend/optimizer/path/joinpath.c

+34-4
Original file line numberDiff line numberDiff line change
@@ -371,19 +371,21 @@ allow_star_schema_join(PlannerInfo *root,
371371
* Returns true the hashing is possible, otherwise return false.
372372
*
373373
* Additionally we also collect the outer exprs and the hash operators for
374-
* each parameter to innerrel. These set in 'param_exprs' and 'operators'
375-
* when we return true.
374+
* each parameter to innerrel. These set in 'param_exprs', 'operators' and
375+
* 'binary_mode' when we return true.
376376
*/
377377
static bool
378378
paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
379379
RelOptInfo *outerrel, RelOptInfo *innerrel,
380-
List **param_exprs, List **operators)
380+
List **param_exprs, List **operators,
381+
bool *binary_mode)
381382

382383
{
383384
ListCell *lc;
384385

385386
*param_exprs = NIL;
386387
*operators = NIL;
388+
*binary_mode = false;
387389

388390
if (param_info != NULL)
389391
{
@@ -431,6 +433,20 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
431433

432434
*operators = lappend_oid(*operators, hasheqoperator);
433435
*param_exprs = lappend(*param_exprs, expr);
436+
437+
/*
438+
* When the join operator is not hashable then it's possible that
439+
* the operator will be able to distinguish something that the
440+
* hash equality operator could not. For example with floating
441+
* point types -0.0 and +0.0 are classed as equal by the hash
442+
* function and equality function, but some other operator may be
443+
* able to tell those values apart. This means that we must put
444+
* memoize into binary comparison mode so that it does bit-by-bit
445+
* comparisons rather than a "logical" comparison as it would
446+
* using the hash equality operator.
447+
*/
448+
if (!OidIsValid(rinfo->hashjoinoperator))
449+
*binary_mode = true;
434450
}
435451
}
436452

@@ -461,6 +477,17 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
461477

462478
*operators = lappend_oid(*operators, typentry->eq_opr);
463479
*param_exprs = lappend(*param_exprs, expr);
480+
481+
/*
482+
* We must go into binary mode as we don't have too much of an idea of
483+
* how these lateral Vars are being used. See comment above when we
484+
* set *binary_mode for the non-lateral Var case. This could be
485+
* relaxed a bit if we had the RestrictInfos and knew the operators
486+
* being used, however for cases like Vars that are arguments to
487+
* functions we must operate in binary mode as we don't have
488+
* visibility into what the function is doing with the Vars.
489+
*/
490+
*binary_mode = true;
464491
}
465492

466493
/* We're okay to use memoize */
@@ -481,6 +508,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
481508
List *param_exprs;
482509
List *hash_operators;
483510
ListCell *lc;
511+
bool binary_mode;
484512

485513
/* Obviously not if it's disabled */
486514
if (!enable_memoize)
@@ -572,14 +600,16 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
572600
outerrel,
573601
innerrel,
574602
&param_exprs,
575-
&hash_operators))
603+
&hash_operators,
604+
&binary_mode))
576605
{
577606
return (Path *) create_memoize_path(root,
578607
innerrel,
579608
inner_path,
580609
param_exprs,
581610
hash_operators,
582611
extra->inner_unique,
612+
binary_mode,
583613
outer_path->parent->rows);
584614
}
585615

src/backend/optimizer/plan/createplan.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,8 @@ static Sort *make_sort_from_groupcols(List *groupcls,
279279
static Material *make_material(Plan *lefttree);
280280
static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
281281
Oid *collations, List *param_exprs,
282-
bool singlerow, uint32 est_entries);
282+
bool singlerow, bool binary_mode,
283+
uint32 est_entries);
283284
static WindowAgg *make_windowagg(List *tlist, Index winref,
284285
int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
285286
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1617,7 +1618,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
16171618
}
16181619

16191620
plan = make_memoize(subplan, operators, collations, param_exprs,
1620-
best_path->singlerow, best_path->est_entries);
1621+
best_path->singlerow, best_path->binary_mode,
1622+
best_path->est_entries);
16211623

16221624
copy_generic_path_info(&plan->plan, (Path *) best_path);
16231625

@@ -6417,7 +6419,8 @@ materialize_finished_plan(Plan *subplan)
64176419

64186420
static Memoize *
64196421
make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
6420-
List *param_exprs, bool singlerow, uint32 est_entries)
6422+
List *param_exprs, bool singlerow, bool binary_mode,
6423+
uint32 est_entries)
64216424
{
64226425
Memoize *node = makeNode(Memoize);
64236426
Plan *plan = &node->plan;
@@ -6432,6 +6435,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
64326435
node->collations = collations;
64336436
node->param_exprs = param_exprs;
64346437
node->singlerow = singlerow;
6438+
node->binary_mode = binary_mode;
64356439
node->est_entries = est_entries;
64366440

64376441
return node;

src/backend/optimizer/util/pathnode.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -1583,7 +1583,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
15831583
MemoizePath *
15841584
create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
15851585
List *param_exprs, List *hash_operators,
1586-
bool singlerow, double calls)
1586+
bool singlerow, bool binary_mode, double calls)
15871587
{
15881588
MemoizePath *pathnode = makeNode(MemoizePath);
15891589

@@ -1603,6 +1603,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
16031603
pathnode->hash_operators = hash_operators;
16041604
pathnode->param_exprs = param_exprs;
16051605
pathnode->singlerow = singlerow;
1606+
pathnode->binary_mode = binary_mode;
16061607
pathnode->calls = calls;
16071608

16081609
/*
@@ -3942,6 +3943,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
39423943
mpath->param_exprs,
39433944
mpath->hash_operators,
39443945
mpath->singlerow,
3946+
mpath->binary_mode,
39453947
mpath->calls);
39463948
}
39473949
default:

0 commit comments

Comments
 (0)