Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3e5f941

Browse files
committed
Reduce the memory requirement for large ispell dictionaries.
This patch eliminates per-chunk palloc overhead for most small allocations needed in the representation of an ispell dictionary. This saves close to a factor of 2 on the current Czech ispell data. While it doesn't cover every last small allocation in the ispell code, we are at the point of diminishing returns, because about 95% of the allocations are covered already. Pavel Stehule, rather heavily revised by Tom
1 parent 9b910de commit 3e5f941

File tree

2 files changed

+74
-12
lines changed

2 files changed

+74
-12
lines changed

src/backend/tsearch/spell.c

+70-12
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,63 @@ NIFinishBuild(IspellDict *Conf)
5959
/* Just for cleanliness, zero the now-dangling pointers */
6060
Conf->buildCxt = NULL;
6161
Conf->Spell = NULL;
62+
Conf->firstfree = NULL;
63+
}
64+
65+
66+
/*
67+
* "Compact" palloc: allocate without extra palloc overhead.
68+
*
69+
* Since we have no need to free the ispell data items individually, there's
70+
* not much value in the per-chunk overhead normally consumed by palloc.
71+
* Getting rid of it is helpful since ispell can allocate a lot of small nodes.
72+
*
73+
* We currently pre-zero all data allocated this way, even though some of it
74+
* doesn't need that. The cpalloc and cpalloc0 macros are just documentation
75+
* to indicate which allocations actually require zeroing.
76+
*/
77+
#define COMPACT_ALLOC_CHUNK 8192 /* must be > aset.c's allocChunkLimit */
78+
#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
79+
80+
static void *
81+
compact_palloc0(IspellDict *Conf, size_t size)
82+
{
83+
void *result;
84+
85+
/* Should only be called during init */
86+
Assert(Conf->buildCxt != NULL);
87+
88+
/* No point in this for large chunks */
89+
if (size > COMPACT_MAX_REQ)
90+
return palloc0(size);
91+
92+
/* Keep everything maxaligned */
93+
size = MAXALIGN(size);
94+
95+
/* Need more space? */
96+
if (size > Conf->avail)
97+
{
98+
Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
99+
Conf->avail = COMPACT_ALLOC_CHUNK;
100+
}
101+
102+
result = (void *) Conf->firstfree;
103+
Conf->firstfree += size;
104+
Conf->avail -= size;
105+
106+
return result;
107+
}
108+
109+
#define cpalloc(size) compact_palloc0(Conf, size)
110+
#define cpalloc0(size) compact_palloc0(Conf, size)
111+
112+
static char *
113+
cpstrdup(IspellDict *Conf, const char *str)
114+
{
115+
char *res = cpalloc(strlen(str) + 1);
116+
117+
strcpy(res, str);
118+
return res;
62119
}
63120

64121

@@ -186,7 +243,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
186243
{
187244
if (Conf->mspell)
188245
{
189-
Conf->mspell += 1024 * 20;
246+
Conf->mspell *= 2;
190247
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
191248
}
192249
else
@@ -324,7 +381,7 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
324381
{
325382
if (Conf->maffixes)
326383
{
327-
Conf->maffixes += 16;
384+
Conf->maffixes *= 2;
328385
Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
329386
}
330387
else
@@ -389,9 +446,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
389446
Affix->flag = flag;
390447
Affix->type = type;
391448

392-
Affix->find = (find && *find) ? pstrdup(find) : VoidString;
449+
Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
393450
if ((Affix->replen = strlen(repl)) > 0)
394-
Affix->repl = pstrdup(repl);
451+
Affix->repl = cpstrdup(Conf, repl);
395452
else
396453
Affix->repl = VoidString;
397454
Conf->naffixes++;
@@ -843,8 +900,9 @@ MergeAffix(IspellDict *Conf, int a1, int a2)
843900
}
844901

845902
ptr = Conf->AffixData + Conf->nAffixData;
846-
*ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) +
847-
1 /* space */ + 1 /* \0 */ );
903+
*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
904+
strlen(Conf->AffixData[a2]) +
905+
1 /* space */ + 1 /* \0 */ );
848906
sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
849907
ptr++;
850908
*ptr = NULL;
@@ -888,7 +946,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
888946
if (!nchar)
889947
return NULL;
890948

891-
rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
949+
rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
892950
rs->length = nchar;
893951
data = rs->data;
894952

@@ -982,7 +1040,7 @@ NISortDictionary(IspellDict *Conf)
9821040
{
9831041
curaffix++;
9841042
Assert(curaffix < naffix);
985-
Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
1043+
Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
9861044
}
9871045

9881046
Conf->Spell[i]->p.d.affix = curaffix;
@@ -1020,7 +1078,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
10201078
aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
10211079
naff = 0;
10221080

1023-
rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1081+
rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
10241082
rs->length = nchar;
10251083
data = rs->data;
10261084

@@ -1036,7 +1094,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
10361094
if (naff)
10371095
{
10381096
data->naff = naff;
1039-
data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1097+
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
10401098
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
10411099
naff = 0;
10421100
}
@@ -1056,7 +1114,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
10561114
if (naff)
10571115
{
10581116
data->naff = naff;
1059-
data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1117+
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
10601118
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
10611119
naff = 0;
10621120
}
@@ -1097,7 +1155,7 @@ mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
10971155
if (cnt == 0)
10981156
return;
10991157

1100-
Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
1158+
Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
11011159
Affix->data->naff = (uint32) cnt;
11021160

11031161
cnt = 0;

src/include/tsearch/dicts/spell.h

+4
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ typedef struct
161161
SPELL **Spell;
162162
int nspell; /* number of valid entries in Spell array */
163163
int mspell; /* allocated length of Spell array */
164+
165+
/* These are used to allocate "compact" data without palloc overhead */
166+
char *firstfree; /* first free address (always maxaligned) */
167+
size_t avail; /* free space remaining at firstfree */
164168
} IspellDict;
165169

166170
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);

0 commit comments

Comments
 (0)