Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2146d8c

Browse files
committed
Add integer aggregator to /contrib.
mlw
1 parent e105f9a commit 2146d8c

File tree

5 files changed

+403
-2
lines changed

5 files changed

+403
-2
lines changed

contrib/README

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,14 @@ fuzzystrmatch -
7171
Levenshtein, metaphone, and soundex fuzzy string matching
7272
by Joe Conway <joseph.conway@home.com>, Joel Burton <jburton@scw.org>
7373

74+
intagg -
75+
Integer aggregator
76+
by mlw <markw@mohawksoft.com>
77+
78+
7479
intarray -
7580
Index support for arrays of int4, using GiST
76-
by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov
77-
<oleg@sai.msu.su>.
81+
by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov <oleg@sai.msu.su>
7882

7983
ipc_check -
8084
Simple test script to help in configuring IPC.

contrib/intagg/Makefile

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#############################################
2+
# Makefile for integer aggregator
3+
# Copyright (C) 2001 Digital Music Network.
4+
# by Mark L. Woodward
5+
#
6+
subdir = contrib/intagg
7+
top_builddir = ../..
8+
include $(top_builddir)/src/Makefile.global
9+
10+
NAME=int_aggregate
11+
SONAME = $(NAME)$(DLSUFFIX)
12+
MODULES = int_aggregate
13+
DATA_built = int_aggregate.so
14+
DOCS = README.int_aggrigate
15+
SQLS=int_aggregate.sql
16+
17+
include $(top_srcdir)/contrib/contrib-global.mk
18+
19+
%.sql: %.sql.in
20+
sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@
21+
22+
all : $(SONAME) $(SQLS)
23+
24+
25+
install : all
26+
$(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir)
27+
28+
29+
clean :
30+
rm -f $(SONAME)
31+
rm -f $(SQLS)

contrib/intagg/README.int_aggrigate

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
Integer aggregator/enumerator.
2+
3+
Many database systems have the notion of a one to many table.
4+
5+
A one to many table usually sits between two indexed tables,
6+
as:
7+
8+
create table one_to_many(left int, right int) ;
9+
10+
And it is used like this:
11+
12+
SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right)
13+
WHERE one_to_many.left = item;
14+
15+
This will return all the items in the right hand table for an entry
16+
in the left hand table. This is a very common construct in SQL.
17+
18+
Now, this methodology can be cumbersome with a very large number of
19+
entries in the one_to_many table. Depending on the order in which
20+
data was entered, a join like this could result in an index scan
21+
and a fetch for each right hand entry in the table for a particular
22+
left hand entry.
23+
24+
If you have a very dynamic system, there is not much you can do.
25+
However, if you have some data which is fairly static, you can
26+
create a summary table with the aggregator.
27+
28+
CREATE TABLE summary as SELECT left, int_array_aggregate(right)
29+
AS right FROM one_to_many GROUP BY left;
30+
31+
This will create a table with one row per left item, and an array
32+
of right items. Now this is pretty useless without some way of using
33+
the array, thats why there is an array enumerator.
34+
35+
SELECT left, int_array_enum(right) FROM summary WHERE left = item;
36+
37+
The above query using int_array_enum, produces the same results as:
38+
39+
SELECT left, right FROM one_to_many WHERE left = item;
40+
41+
The difference is that the query against the summary table has to get
42+
only one row from the table, where as the query against "one_to_many"
43+
must index scan and fetch a row for each entry.
44+
45+
On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced
46+
to a cost of 329. The query is a join between the one_to_many table,
47+
48+
select right, count(right) from
49+
(
50+
select left, int_array_enum(right) as right from summary join
51+
(select left from left_table where left = item) as lefts
52+
ON (summary.left = lefts.left )
53+
) as list group by right order by count desc ;
54+
55+

contrib/intagg/int_aggregate.c

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
/*
2+
* Integer array aggregator / enumerator
3+
*
4+
* Mark L. Woodward
5+
* DMN Digital Music Network.
6+
* www.dmn.com
7+
*
8+
* Copyright (C) Digital Music Network
9+
* December 20, 2001
10+
*
11+
* This file is the property of the Digital Music Network (DMN).
12+
* It is being made available to users of the PostgreSQL system
13+
* under the BSD license.
14+
*
15+
*/
16+
#include "postgres.h"
17+
18+
#include <ctype.h>
19+
#include <stdio.h>
20+
#include <sys/types.h>
21+
#include <string.h>
22+
#include "postgres.h"
23+
#include "access/heapam.h"
24+
#include "catalog/catname.h"
25+
#include "catalog/indexing.h"
26+
#include "catalog/pg_proc.h"
27+
#include "executor/executor.h"
28+
#include "utils/fcache.h"
29+
#include "utils/sets.h"
30+
#include "utils/syscache.h"
31+
#include "access/tupmacs.h"
32+
#include "access/xact.h"
33+
#include "fmgr.h"
34+
#include "miscadmin.h"
35+
#include "utils/array.h"
36+
#include "utils/builtins.h"
37+
#include "utils/memutils.h"
38+
#include "utils/lsyscache.h"
39+
40+
41+
/* This is actually a postgres version of a one dimentional array */
42+
43+
typedef struct agg
44+
{
45+
ArrayType a;
46+
int items;
47+
int lower;
48+
int4 array[1];
49+
}PGARRAY;
50+
51+
/* This is used to keep track of our position during enumeration */
52+
typedef struct callContext
53+
{
54+
PGARRAY *p;
55+
int num;
56+
int flags;
57+
}CTX;
58+
59+
#define TOASTED 1
60+
#define START_NUM 8
61+
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4)))
62+
63+
static PGARRAY * GetPGArray(int4 state, int fAdd);
64+
static PGARRAY *ShrinkPGArray(PGARRAY *p);
65+
66+
Datum int_agg_state(PG_FUNCTION_ARGS);
67+
Datum int_agg_final_count(PG_FUNCTION_ARGS);
68+
Datum int_agg_final_array(PG_FUNCTION_ARGS);
69+
Datum int_enum(PG_FUNCTION_ARGS);
70+
71+
PG_FUNCTION_INFO_V1(int_agg_state);
72+
PG_FUNCTION_INFO_V1(int_agg_final_count);
73+
PG_FUNCTION_INFO_V1(int_agg_final_array);
74+
PG_FUNCTION_INFO_V1(int_enum);
75+
76+
/*
77+
* Manage the aggregation state of the array
78+
* You need to specify the correct memory context, or it will vanish!
79+
*/
80+
static PGARRAY * GetPGArray(int4 state, int fAdd)
81+
{
82+
PGARRAY *p = (PGARRAY *) state;
83+
84+
if(!state)
85+
{
86+
/* New array */
87+
int cb = PGARRAY_SIZE(START_NUM);
88+
89+
p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb);
90+
91+
if(!p)
92+
{
93+
elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory");
94+
return 0;
95+
}
96+
97+
p->a.size = cb;
98+
p->a.ndim= 0;
99+
p->a.flags = 0;
100+
p->items = 0;
101+
p->lower= START_NUM;
102+
}
103+
else if(fAdd)
104+
{ /* Ensure array has space */
105+
if(p->items >= p->lower)
106+
{
107+
PGARRAY *pn;
108+
int n = p->lower + p->lower;
109+
int cbNew = PGARRAY_SIZE(n);
110+
111+
pn = (PGARRAY *) repalloc(p, cbNew);
112+
113+
if(!pn)
114+
{ /* Realloc failed! Reallocate new block. */
115+
pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew);
116+
if(!pn)
117+
{
118+
elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory");
119+
return (PGARRAY *) NULL;
120+
}
121+
memcpy(pn, p, p->a.size);
122+
pfree(p);
123+
}
124+
pn->a.size = cbNew;
125+
pn->lower = n;
126+
return pn;
127+
}
128+
}
129+
return p;
130+
}
131+
132+
/* Shrinks the array to its actual size and moves it into the standard
133+
* memory allocation context, frees working memory */
134+
static PGARRAY *ShrinkPGArray(PGARRAY *p)
135+
{
136+
PGARRAY *pnew=NULL;
137+
if(p)
138+
{
139+
/* get target size */
140+
int cb = PGARRAY_SIZE(p->items);
141+
142+
/* use current transaction context */
143+
pnew = palloc(cb);
144+
145+
if(pnew)
146+
{
147+
/* Fix up the fields in the new structure, so Postgres understands */
148+
memcpy(pnew, p, cb);
149+
pnew->a.size = cb;
150+
pnew->a.ndim=1;
151+
pnew->a.flags = 0;
152+
pnew->lower = 0;
153+
}
154+
else
155+
{
156+
elog(ERROR, "Integer aggregator, can't allocate memory");
157+
}
158+
pfree(p);
159+
}
160+
return pnew;
161+
}
162+
163+
/* Called for each iteration during an aggregate function */
164+
Datum int_agg_state(PG_FUNCTION_ARGS)
165+
{
166+
int4 state = PG_GETARG_INT32(0);
167+
int4 value = PG_GETARG_INT32(1);
168+
169+
PGARRAY *p = GetPGArray(state, 1);
170+
if(!p)
171+
{
172+
elog(ERROR,"No aggregate storage\n");
173+
}
174+
else if(p->items >= p->lower)
175+
{
176+
elog(ERROR,"aggregate storage too small\n");
177+
}
178+
else
179+
{
180+
p->array[p->items++]= value;
181+
}
182+
PG_RETURN_INT32(p);
183+
}
184+
185+
/* This is the final function used for the integer aggregator. It returns all the integers
186+
* collected as a one dimentional integer array */
187+
Datum int_agg_final_array(PG_FUNCTION_ARGS)
188+
{
189+
PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0));
190+
if(pnew)
191+
{
192+
PG_RETURN_POINTER(pnew);
193+
}
194+
else
195+
{
196+
PG_RETURN_NULL();
197+
}
198+
}
199+
200+
/* This function accepts an array, and returns one item for each entry in the array */
201+
Datum int_enum(PG_FUNCTION_ARGS)
202+
{
203+
CTX *pc;
204+
PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0);
205+
ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo;
206+
207+
if(!p)
208+
{
209+
elog(NOTICE, "No data sent\n");
210+
return 0;
211+
}
212+
if(!rsi)
213+
{
214+
elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer");
215+
PG_RETURN_NULL();
216+
217+
}
218+
if(!fcinfo->context)
219+
{
220+
/* Allocate a working context */
221+
pc = (CTX *) palloc(sizeof(CTX));
222+
223+
if(!pc)
224+
{
225+
elog(ERROR, "CTX Alocation failed\n");
226+
PG_RETURN_NULL();
227+
}
228+
229+
/* Don't copy atribute if you don't need too */
230+
if(VARATT_IS_EXTENDED(p) )
231+
{
232+
/* Toasted!!! */
233+
pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p);
234+
pc->flags = TOASTED;
235+
if(!pc->p)
236+
{
237+
elog(ERROR, "Error in toaster!!! no detoasting\n");
238+
PG_RETURN_NULL();
239+
}
240+
}
241+
else
242+
{
243+
/* Untoasted */
244+
pc->p = p;
245+
pc->flags = 0;
246+
}
247+
fcinfo->context = (Node *) pc;
248+
pc->num=0;
249+
}
250+
else /* use an existing one */
251+
{
252+
pc = (CTX *) fcinfo->context;
253+
}
254+
/* Are we done yet? */
255+
if(pc->num >= pc->p->items)
256+
{
257+
/* We are done */
258+
if(pc->flags & TOASTED)
259+
pfree(pc->p);
260+
pfree(fcinfo->context);
261+
fcinfo->context = NULL;
262+
rsi->isDone = ExprEndResult ;
263+
}
264+
else /* nope, return the next value */
265+
{
266+
int val = pc->p->array[pc->num++];
267+
rsi->isDone = ExprMultipleResult;
268+
PG_RETURN_INT32(val);
269+
}
270+
PG_RETURN_NULL();
271+
}

0 commit comments

Comments
 (0)