Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c4e4422

Browse files
committed
Extend mul_var_short() to 5 and 6-digit inputs.
Commit ca481d3 introduced mul_var_short(), which is used by mul_var() whenever the shorter input has 1-4 NBASE digits and the exact product is requested. As speculated on in that commit, it can be extended to work for more digits in the shorter input. This commit extends it up to 6 NBASE digits (up to 24 decimal digits), for which it also gives a significant speedup. This covers more cases likely to occur in real-world queries, for which using base-NBASE^2 arithmetic provides little benefit. To avoid code bloat and duplication, refactor it a bit using macros and exploiting the fact that some portions of the code are shared between the different cases. Dean Rasheed, reviewed by Joel Jacobson. Discussion: https://postgr.es/m/9d8a4a42-c354-41f3-bbf3-199e1957db97%40app.fastmail.com
1 parent fce7cb6 commit c4e4422

File tree

1 file changed

+123
-52
lines changed

1 file changed

+123
-52
lines changed

src/backend/utils/adt/numeric.c

Lines changed: 123 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8714,10 +8714,10 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
87148714
}
87158715

87168716
/*
8717-
* If var1 has 1-4 digits and the exact result was requested, delegate to
8717+
* If var1 has 1-6 digits and the exact result was requested, delegate to
87188718
* mul_var_short() which uses a faster direct multiplication algorithm.
87198719
*/
8720-
if (var1ndigits <= 4 && rscale == var1->dscale + var2->dscale)
8720+
if (var1ndigits <= 6 && rscale == var1->dscale + var2->dscale)
87218721
{
87228722
mul_var_short(var1, var2, result);
87238723
return;
@@ -8876,7 +8876,7 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
88768876
/*
88778877
* mul_var_short() -
88788878
*
8879-
* Special-case multiplication function used when var1 has 1-4 digits, var2
8879+
* Special-case multiplication function used when var1 has 1-6 digits, var2
88808880
* has at least as many digits as var1, and the exact product var1 * var2 is
88818881
* requested.
88828882
*/
@@ -8898,7 +8898,7 @@ mul_var_short(const NumericVar *var1, const NumericVar *var2,
88988898

88998899
/* Check preconditions */
89008900
Assert(var1ndigits >= 1);
8901-
Assert(var1ndigits <= 4);
8901+
Assert(var1ndigits <= 6);
89028902
Assert(var2ndigits >= var1ndigits);
89038903

89048904
/*
@@ -8925,6 +8925,13 @@ mul_var_short(const NumericVar *var1, const NumericVar *var2,
89258925
* carry up as we go. The i'th result digit consists of the sum of the
89268926
* products var1digits[i1] * var2digits[i2] for which i = i1 + i2 + 1.
89278927
*/
8928+
#define PRODSUM1(v1,i1,v2,i2) ((v1)[(i1)] * (v2)[(i2)])
8929+
#define PRODSUM2(v1,i1,v2,i2) (PRODSUM1(v1,i1,v2,i2) + (v1)[(i1)+1] * (v2)[(i2)-1])
8930+
#define PRODSUM3(v1,i1,v2,i2) (PRODSUM2(v1,i1,v2,i2) + (v1)[(i1)+2] * (v2)[(i2)-2])
8931+
#define PRODSUM4(v1,i1,v2,i2) (PRODSUM3(v1,i1,v2,i2) + (v1)[(i1)+3] * (v2)[(i2)-3])
8932+
#define PRODSUM5(v1,i1,v2,i2) (PRODSUM4(v1,i1,v2,i2) + (v1)[(i1)+4] * (v2)[(i2)-4])
8933+
#define PRODSUM6(v1,i1,v2,i2) (PRODSUM5(v1,i1,v2,i2) + (v1)[(i1)+5] * (v2)[(i2)-5])
8934+
89288935
switch (var1ndigits)
89298936
{
89308937
case 1:
@@ -8936,9 +8943,9 @@ mul_var_short(const NumericVar *var1, const NumericVar *var2,
89368943
* ----------
89378944
*/
89388945
carry = 0;
8939-
for (int i = res_ndigits - 2; i >= 0; i--)
8946+
for (int i = var2ndigits - 1; i >= 0; i--)
89408947
{
8941-
term = (uint32) var1digits[0] * var2digits[i] + carry;
8948+
term = PRODSUM1(var1digits, 0, var2digits, i) + carry;
89428949
res_digits[i + 1] = (NumericDigit) (term % NBASE);
89438950
carry = term / NBASE;
89448951
}
@@ -8954,23 +8961,17 @@ mul_var_short(const NumericVar *var1, const NumericVar *var2,
89548961
* ----------
89558962
*/
89568963
/* last result digit and carry */
8957-
term = (uint32) var1digits[1] * var2digits[res_ndigits - 3];
8964+
term = PRODSUM1(var1digits, 1, var2digits, var2ndigits - 1);
89588965
res_digits[res_ndigits - 1] = (NumericDigit) (term % NBASE);
89598966
carry = term / NBASE;
89608967

89618968
/* remaining digits, except for the first two */
8962-
for (int i = res_ndigits - 3; i >= 1; i--)
8969+
for (int i = var2ndigits - 1; i >= 1; i--)
89638970
{
8964-
term = (uint32) var1digits[0] * var2digits[i] +
8965-
(uint32) var1digits[1] * var2digits[i - 1] + carry;
8971+
term = PRODSUM2(var1digits, 0, var2digits, i) + carry;
89668972
res_digits[i + 1] = (NumericDigit) (term % NBASE);
89678973
carry = term / NBASE;
89688974
}
8969-
8970-
/* first two digits */
8971-
term = (uint32) var1digits[0] * var2digits[0] + carry;
8972-
res_digits[1] = (NumericDigit) (term % NBASE);
8973-
res_digits[0] = (NumericDigit) (term / NBASE);
89748975
break;
89758976

89768977
case 3:
@@ -8982,34 +8983,21 @@ mul_var_short(const NumericVar *var1, const NumericVar *var2,
89828983
* ----------
89838984
*/
89848985
/* last two result digits */
8985-
term = (uint32) var1digits[2] * var2digits[res_ndigits - 4];
8986+
term = PRODSUM1(var1digits, 2, var2digits, var2ndigits - 1);
89868987
res_digits[res_ndigits - 1] = (NumericDigit) (term % NBASE);
89878988
carry = term / NBASE;
89888989

8989-
term = (uint32) var1digits[1] * var2digits[res_ndigits - 4] +
8990-
(uint32) var1digits[2] * var2digits[res_ndigits - 5] + carry;
8990+
term = PRODSUM2(var1digits, 1, var2digits, var2ndigits - 1) + carry;
89918991
res_digits[res_ndigits - 2] = (NumericDigit) (term % NBASE);
89928992
carry = term / NBASE;
89938993

89948994
/* remaining digits, except for the first three */
8995-
for (int i = res_ndigits - 4; i >= 2; i--)
8995+
for (int i = var2ndigits - 1; i >= 2; i--)
89968996
{
8997-
term = (uint32) var1digits[0] * var2digits[i] +
8998-
(uint32) var1digits[1] * var2digits[i - 1] +
8999-
(uint32) var1digits[2] * var2digits[i - 2] + carry;
8997+
term = PRODSUM3(var1digits, 0, var2digits, i) + carry;
90008998
res_digits[i + 1] = (NumericDigit) (term % NBASE);
90018999
carry = term / NBASE;
90029000
}
9003-
9004-
/* first three digits */
9005-
term = (uint32) var1digits[0] * var2digits[1] +
9006-
(uint32) var1digits[1] * var2digits[0] + carry;
9007-
res_digits[2] = (NumericDigit) (term % NBASE);
9008-
carry = term / NBASE;
9009-
9010-
term = (uint32) var1digits[0] * var2digits[0] + carry;
9011-
res_digits[1] = (NumericDigit) (term % NBASE);
9012-
res_digits[0] = (NumericDigit) (term / NBASE);
90139001
break;
90149002

90159003
case 4:
@@ -9021,45 +9009,128 @@ mul_var_short(const NumericVar *var1, const NumericVar *var2,
90219009
* ----------
90229010
*/
90239011
/* last three result digits */
9024-
term = (uint32) var1digits[3] * var2digits[res_ndigits - 5];
9012+
term = PRODSUM1(var1digits, 3, var2digits, var2ndigits - 1);
90259013
res_digits[res_ndigits - 1] = (NumericDigit) (term % NBASE);
90269014
carry = term / NBASE;
90279015

9028-
term = (uint32) var1digits[2] * var2digits[res_ndigits - 5] +
9029-
(uint32) var1digits[3] * var2digits[res_ndigits - 6] + carry;
9016+
term = PRODSUM2(var1digits, 2, var2digits, var2ndigits - 1) + carry;
90309017
res_digits[res_ndigits - 2] = (NumericDigit) (term % NBASE);
90319018
carry = term / NBASE;
90329019

9033-
term = (uint32) var1digits[1] * var2digits[res_ndigits - 5] +
9034-
(uint32) var1digits[2] * var2digits[res_ndigits - 6] +
9035-
(uint32) var1digits[3] * var2digits[res_ndigits - 7] + carry;
9020+
term = PRODSUM3(var1digits, 1, var2digits, var2ndigits - 1) + carry;
90369021
res_digits[res_ndigits - 3] = (NumericDigit) (term % NBASE);
90379022
carry = term / NBASE;
90389023

90399024
/* remaining digits, except for the first four */
9040-
for (int i = res_ndigits - 5; i >= 3; i--)
9025+
for (int i = var2ndigits - 1; i >= 3; i--)
90419026
{
9042-
term = (uint32) var1digits[0] * var2digits[i] +
9043-
(uint32) var1digits[1] * var2digits[i - 1] +
9044-
(uint32) var1digits[2] * var2digits[i - 2] +
9045-
(uint32) var1digits[3] * var2digits[i - 3] + carry;
9027+
term = PRODSUM4(var1digits, 0, var2digits, i) + carry;
90469028
res_digits[i + 1] = (NumericDigit) (term % NBASE);
90479029
carry = term / NBASE;
90489030
}
9031+
break;
90499032

9050-
/* first four digits */
9051-
term = (uint32) var1digits[0] * var2digits[2] +
9052-
(uint32) var1digits[1] * var2digits[1] +
9053-
(uint32) var1digits[2] * var2digits[0] + carry;
9054-
res_digits[3] = (NumericDigit) (term % NBASE);
9033+
case 5:
9034+
/* ---------
9035+
* 5-digit case:
9036+
* var1ndigits = 5
9037+
* var2ndigits >= 5
9038+
* res_ndigits = var2ndigits + 5
9039+
* ----------
9040+
*/
9041+
/* last four result digits */
9042+
term = PRODSUM1(var1digits, 4, var2digits, var2ndigits - 1);
9043+
res_digits[res_ndigits - 1] = (NumericDigit) (term % NBASE);
90559044
carry = term / NBASE;
90569045

9057-
term = (uint32) var1digits[0] * var2digits[1] +
9058-
(uint32) var1digits[1] * var2digits[0] + carry;
9059-
res_digits[2] = (NumericDigit) (term % NBASE);
9046+
term = PRODSUM2(var1digits, 3, var2digits, var2ndigits - 1) + carry;
9047+
res_digits[res_ndigits - 2] = (NumericDigit) (term % NBASE);
9048+
carry = term / NBASE;
9049+
9050+
term = PRODSUM3(var1digits, 2, var2digits, var2ndigits - 1) + carry;
9051+
res_digits[res_ndigits - 3] = (NumericDigit) (term % NBASE);
90609052
carry = term / NBASE;
90619053

9062-
term = (uint32) var1digits[0] * var2digits[0] + carry;
9054+
term = PRODSUM4(var1digits, 1, var2digits, var2ndigits - 1) + carry;
9055+
res_digits[res_ndigits - 4] = (NumericDigit) (term % NBASE);
9056+
carry = term / NBASE;
9057+
9058+
/* remaining digits, except for the first five */
9059+
for (int i = var2ndigits - 1; i >= 4; i--)
9060+
{
9061+
term = PRODSUM5(var1digits, 0, var2digits, i) + carry;
9062+
res_digits[i + 1] = (NumericDigit) (term % NBASE);
9063+
carry = term / NBASE;
9064+
}
9065+
break;
9066+
9067+
case 6:
9068+
/* ---------
9069+
* 6-digit case:
9070+
* var1ndigits = 6
9071+
* var2ndigits >= 6
9072+
* res_ndigits = var2ndigits + 6
9073+
* ----------
9074+
*/
9075+
/* last five result digits */
9076+
term = PRODSUM1(var1digits, 5, var2digits, var2ndigits - 1);
9077+
res_digits[res_ndigits - 1] = (NumericDigit) (term % NBASE);
9078+
carry = term / NBASE;
9079+
9080+
term = PRODSUM2(var1digits, 4, var2digits, var2ndigits - 1) + carry;
9081+
res_digits[res_ndigits - 2] = (NumericDigit) (term % NBASE);
9082+
carry = term / NBASE;
9083+
9084+
term = PRODSUM3(var1digits, 3, var2digits, var2ndigits - 1) + carry;
9085+
res_digits[res_ndigits - 3] = (NumericDigit) (term % NBASE);
9086+
carry = term / NBASE;
9087+
9088+
term = PRODSUM4(var1digits, 2, var2digits, var2ndigits - 1) + carry;
9089+
res_digits[res_ndigits - 4] = (NumericDigit) (term % NBASE);
9090+
carry = term / NBASE;
9091+
9092+
term = PRODSUM5(var1digits, 1, var2digits, var2ndigits - 1) + carry;
9093+
res_digits[res_ndigits - 5] = (NumericDigit) (term % NBASE);
9094+
carry = term / NBASE;
9095+
9096+
/* remaining digits, except for the first six */
9097+
for (int i = var2ndigits - 1; i >= 5; i--)
9098+
{
9099+
term = PRODSUM6(var1digits, 0, var2digits, i) + carry;
9100+
res_digits[i + 1] = (NumericDigit) (term % NBASE);
9101+
carry = term / NBASE;
9102+
}
9103+
break;
9104+
}
9105+
9106+
/*
9107+
* Finally, for var1ndigits > 1, compute the remaining var1ndigits most
9108+
* significant result digits.
9109+
*/
9110+
switch (var1ndigits)
9111+
{
9112+
case 6:
9113+
term = PRODSUM5(var1digits, 0, var2digits, 4) + carry;
9114+
res_digits[5] = (NumericDigit) (term % NBASE);
9115+
carry = term / NBASE;
9116+
/* FALLTHROUGH */
9117+
case 5:
9118+
term = PRODSUM4(var1digits, 0, var2digits, 3) + carry;
9119+
res_digits[4] = (NumericDigit) (term % NBASE);
9120+
carry = term / NBASE;
9121+
/* FALLTHROUGH */
9122+
case 4:
9123+
term = PRODSUM3(var1digits, 0, var2digits, 2) + carry;
9124+
res_digits[3] = (NumericDigit) (term % NBASE);
9125+
carry = term / NBASE;
9126+
/* FALLTHROUGH */
9127+
case 3:
9128+
term = PRODSUM2(var1digits, 0, var2digits, 1) + carry;
9129+
res_digits[2] = (NumericDigit) (term % NBASE);
9130+
carry = term / NBASE;
9131+
/* FALLTHROUGH */
9132+
case 2:
9133+
term = PRODSUM1(var1digits, 0, var2digits, 0) + carry;
90639134
res_digits[1] = (NumericDigit) (term % NBASE);
90649135
res_digits[0] = (NumericDigit) (term / NBASE);
90659136
break;

0 commit comments

Comments
 (0)