Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 03109a5

Browse files
committed
Fix behavior of float aggregates for single Inf or NaN inputs.
When there is just one non-null input value, and it is infinity or NaN, aggregates such as stddev_pop and covar_pop should produce a NaN result, because the calculation is not well-defined. They used to do so, but since we adopted Youngs-Cramer aggregation in commit e954a72, they produced zero instead. That's an oversight, so fix it. Add tests exercising these edge cases. Affected aggregates are var_pop(double precision) stddev_pop(double precision) var_pop(real) stddev_pop(real) regr_sxx(double precision,double precision) regr_syy(double precision,double precision) regr_sxy(double precision,double precision) regr_r2(double precision,double precision) regr_slope(double precision,double precision) regr_intercept(double precision,double precision) covar_pop(double precision,double precision) corr(double precision,double precision) Back-patch to v12 where the behavior change was accidentally introduced. Report and patch by me; thanks to Dean Rasheed for review. Discussion: https://postgr.es/m/353062.1591898766@sss.pgh.pa.us
1 parent d64f1cd commit 03109a5

File tree

3 files changed

+159
-2
lines changed

3 files changed

+159
-2
lines changed

src/backend/utils/adt/float.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2925,6 +2925,17 @@ float8_accum(PG_FUNCTION_ARGS)
29252925
Sxx = get_float8_nan();
29262926
}
29272927
}
2928+
else
2929+
{
2930+
/*
2931+
* At the first input, we normally can leave Sxx as 0. However, if
2932+
* the first input is Inf or NaN, we'd better force Sxx to NaN;
2933+
* otherwise we will falsely report variance zero when there are no
2934+
* more inputs.
2935+
*/
2936+
if (isnan(newval) || isinf(newval))
2937+
Sxx = get_float8_nan();
2938+
}
29282939

29292940
/*
29302941
* If we're invoked as an aggregate, we can cheat and modify our first
@@ -2999,6 +3010,17 @@ float4_accum(PG_FUNCTION_ARGS)
29993010
Sxx = get_float8_nan();
30003011
}
30013012
}
3013+
else
3014+
{
3015+
/*
3016+
* At the first input, we normally can leave Sxx as 0. However, if
3017+
* the first input is Inf or NaN, we'd better force Sxx to NaN;
3018+
* otherwise we will falsely report variance zero when there are no
3019+
* more inputs.
3020+
*/
3021+
if (isnan(newval) || isinf(newval))
3022+
Sxx = get_float8_nan();
3023+
}
30023024

30033025
/*
30043026
* If we're invoked as an aggregate, we can cheat and modify our first
@@ -3225,6 +3247,19 @@ float8_regr_accum(PG_FUNCTION_ARGS)
32253247
Sxy = get_float8_nan();
32263248
}
32273249
}
3250+
else
3251+
{
3252+
/*
3253+
* At the first input, we normally can leave Sxx et al as 0. However,
3254+
* if the first input is Inf or NaN, we'd better force the dependent
3255+
* sums to NaN; otherwise we will falsely report variance zero when
3256+
* there are no more inputs.
3257+
*/
3258+
if (isnan(newvalX) || isinf(newvalX))
3259+
Sxx = Sxy = get_float8_nan();
3260+
if (isnan(newvalY) || isinf(newvalY))
3261+
Syy = Sxy = get_float8_nan();
3262+
}
32283263

32293264
/*
32303265
* If we're invoked as an aggregate, we can cheat and modify our first

src/test/regress/expected/aggregates.out

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,79 @@ SELECT var_samp(b::numeric) FROM aggtest;
127127

128128
-- population variance is defined for a single tuple, sample variance
129129
-- is not
130-
SELECT var_pop(1.0), var_samp(2.0);
130+
SELECT var_pop(1.0::float8), var_samp(2.0::float8);
131+
var_pop | var_samp
132+
---------+----------
133+
0 |
134+
(1 row)
135+
136+
SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
137+
stddev_pop | stddev_samp
138+
------------+-------------
139+
0 |
140+
(1 row)
141+
142+
SELECT var_pop('inf'::float8), var_samp('inf'::float8);
143+
var_pop | var_samp
144+
---------+----------
145+
NaN |
146+
(1 row)
147+
148+
SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
149+
stddev_pop | stddev_samp
150+
------------+-------------
151+
NaN |
152+
(1 row)
153+
154+
SELECT var_pop('nan'::float8), var_samp('nan'::float8);
155+
var_pop | var_samp
156+
---------+----------
157+
NaN |
158+
(1 row)
159+
160+
SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
161+
stddev_pop | stddev_samp
162+
------------+-------------
163+
NaN |
164+
(1 row)
165+
166+
SELECT var_pop(1.0::float4), var_samp(2.0::float4);
167+
var_pop | var_samp
168+
---------+----------
169+
0 |
170+
(1 row)
171+
172+
SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
173+
stddev_pop | stddev_samp
174+
------------+-------------
175+
0 |
176+
(1 row)
177+
178+
SELECT var_pop('inf'::float4), var_samp('inf'::float4);
179+
var_pop | var_samp
180+
---------+----------
181+
NaN |
182+
(1 row)
183+
184+
SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
185+
stddev_pop | stddev_samp
186+
------------+-------------
187+
NaN |
188+
(1 row)
189+
190+
SELECT var_pop('nan'::float4), var_samp('nan'::float4);
191+
var_pop | var_samp
192+
---------+----------
193+
NaN |
194+
(1 row)
195+
196+
SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
197+
stddev_pop | stddev_samp
198+
------------+-------------
199+
NaN |
200+
(1 row)
201+
202+
SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
131203
var_pop | var_samp
132204
---------+----------
133205
0 |
@@ -139,6 +211,18 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
139211
0 |
140212
(1 row)
141213

214+
SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
215+
var_pop | var_samp
216+
---------+----------
217+
NaN | NaN
218+
(1 row)
219+
220+
SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
221+
stddev_pop | stddev_samp
222+
------------+-------------
223+
NaN | NaN
224+
(1 row)
225+
142226
-- verify correct results for null and NaN inputs
143227
select sum(null::int4) from generate_series(1,3);
144228
sum
@@ -299,6 +383,25 @@ SELECT corr(b, a) FROM aggtest;
299383
0.139634516517873
300384
(1 row)
301385

386+
-- check single-tuple behavior
387+
SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
388+
covar_pop | covar_samp
389+
-----------+------------
390+
0 |
391+
(1 row)
392+
393+
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
394+
covar_pop | covar_samp
395+
-----------+------------
396+
NaN |
397+
(1 row)
398+
399+
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
400+
covar_pop | covar_samp
401+
-----------+------------
402+
NaN |
403+
(1 row)
404+
302405
-- test accum and combine functions directly
303406
CREATE TABLE regr_test (x float8, y float8);
304407
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);

src/test/regress/sql/aggregates.sql

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,22 @@ SELECT var_samp(b::numeric) FROM aggtest;
3939

4040
-- population variance is defined for a single tuple, sample variance
4141
-- is not
42-
SELECT var_pop(1.0), var_samp(2.0);
42+
SELECT var_pop(1.0::float8), var_samp(2.0::float8);
43+
SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
44+
SELECT var_pop('inf'::float8), var_samp('inf'::float8);
45+
SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
46+
SELECT var_pop('nan'::float8), var_samp('nan'::float8);
47+
SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
48+
SELECT var_pop(1.0::float4), var_samp(2.0::float4);
49+
SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
50+
SELECT var_pop('inf'::float4), var_samp('inf'::float4);
51+
SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
52+
SELECT var_pop('nan'::float4), var_samp('nan'::float4);
53+
SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
54+
SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
4355
SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
56+
SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
57+
SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
4458

4559
-- verify correct results for null and NaN inputs
4660
select sum(null::int4) from generate_series(1,3);
@@ -81,6 +95,11 @@ SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
8195
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
8296
SELECT corr(b, a) FROM aggtest;
8397

98+
-- check single-tuple behavior
99+
SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
100+
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
101+
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
102+
84103
-- test accum and combine functions directly
85104
CREATE TABLE regr_test (x float8, y float8);
86105
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);

0 commit comments

Comments
 (0)