Skip to content

Commit bf24cec

Browse files
committed
perf: allocate Normalize buffers dynamically, store in AT
Large stack allocations have a performance implication due to stack clash protection. Dynamically allocate the buffers needed by Normalize in AT, instead. This leads to a large performance improvement.
1 parent 2f63692 commit bf24cec

6 files changed

Lines changed: 118 additions & 23 deletions

File tree

sources/declare.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,6 +1307,7 @@ extern void SubsInAll(PHEAD0);
13071307
extern void TransferBuffer(int,int,int);
13081308
extern int TakeIDfunction(PHEAD WORD *);
13091309
extern int MakeSetupAllocs(void);
1310+
extern NORMDATA *AllocNormData(void);
13101311
extern int TryFileSetups(void);
13111312
extern void ExchangeExpressions(int,int);
13121313
extern void ExchangeDollars(int,int);

sources/normal.c

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
/** @file normal.c
22
*
3-
* Mainly the routine Normalize. This routine brings terms to standard
4-
* FORM. Currently it has one serious drawback. Its buffers are all
5-
* in the stack. This means these buffers have a fixed size (NORMSIZE).
6-
* In the past this has caused problems and NORMSIZE had to be increased.
3+
* Mainly the routine Normalize. This routine brings terms to standard
4+
* form. Its main buffers are in AT, but they have a fixed size controlled
5+
* by NORMSIZE, which limit the maximum complexity of terms which can be
6+
* normalized.
77
*
8-
* It is not clear whether Normalize can be called recursively.
8+
* Normalize is called recursively, currently via:
9+
* Normalize -> ExpandRat -> Normalize.
910
*/
1011
/* #[ License : */
1112
/*
@@ -185,8 +186,7 @@ int Commute(WORD *fleft, WORD *fright)
185186
186187
This is the big normalization routine. It has a great need
187188
to be economical.
188-
There is a fixed limit to the number of objects coming in.
189-
Something should be done about it.
189+
The limit on the number of objects coming in is given by NORMSIZE.
190190
191191
*/
192192

@@ -199,20 +199,41 @@ int Normalize(PHEAD WORD *term)
199199
WORD *t, *m, *r, i, j, k, l, nsym, *ss, *tt, *u;
200200
WORD shortnum, stype;
201201
WORD *stop, *to = 0, *from = 0;
202-
/*
203-
The next variables could be better off in the AT.WorkSpace (?)
204-
Now they make stackallocations rather bothersome.
205-
*/
206-
WORD psym[7*NORMSIZE],*ppsym;
207-
WORD pvec[NORMSIZE],*ppvec,nvec;
208-
WORD pdot[3*NORMSIZE],*ppdot,ndot;
209-
WORD pdel[2*NORMSIZE],*ppdel,ndel;
210-
WORD pind[NORMSIZE],nind;
211-
WORD *peps[NORMSIZE/3],neps;
212-
WORD *pden[NORMSIZE/3],nden;
213-
WORD *pcom[NORMSIZE],ncom;
214-
WORD *pnco[NORMSIZE],nnco;
215-
WORD *pcon[2*NORMSIZE],ncon; /* Pointer to contractable indices */
202+
203+
WORD *ppsym, *ppvec, *ppdot, *ppdel;
204+
WORD nvec, ndot, ndel, nind, neps, nden, ncom, nnco, ncon;
205+
206+
AT.NormDepth++;
207+
#ifdef DEBUGGING
208+
if ( AT.NormDepth > 2 ) {
209+
// We don't expect this to happen in the current codebase.
210+
Terminate(-1);
211+
}
212+
#endif
213+
if ( AT.NormDepth > AT.NormDataSize ) {
214+
NORMDATA **top = AT.NormData + AT.NormDataSize;
215+
DoubleBuffer((void **)&(AT.NormData), (void **)&(top),
216+
sizeof(*AT.NormData), "double NormData pointers");
217+
AT.NormDataSize *= 2;
218+
for ( LONG i = AT.NormDepth-1; i < AT.NormDataSize; i++ ) {
219+
AT.NormData[i] = NULL;
220+
}
221+
}
222+
if ( AT.NormData[AT.NormDepth-1] == NULL ) {
223+
AT.NormData[AT.NormDepth-1] = AllocNormData();
224+
}
225+
226+
WORD *psym = AT.NormData[AT.NormDepth-1]->psym;
227+
WORD *pvec = AT.NormData[AT.NormDepth-1]->pvec;
228+
WORD *pdot = AT.NormData[AT.NormDepth-1]->pdot;
229+
WORD *pdel = AT.NormData[AT.NormDepth-1]->pdel;
230+
WORD *pind = AT.NormData[AT.NormDepth-1]->pind;
231+
WORD **peps = AT.NormData[AT.NormDepth-1]->peps;
232+
WORD **pden = AT.NormData[AT.NormDepth-1]->pden;
233+
WORD **pcom = AT.NormData[AT.NormDepth-1]->pcom;
234+
WORD **pnco = AT.NormData[AT.NormDepth-1]->pnco;
235+
WORD **pcon = AT.NormData[AT.NormDepth-1]->pcon;
236+
216237
WORD *n_coef, ncoef; /* Accumulator for the coefficient */
217238
WORD *n_llnum, *lnum, nnum;
218239
WORD *termout, oldtoprhs = 0, subtype;
@@ -243,7 +264,12 @@ PrintTerm(term,"Normalize");
243264
didcontr = 0;
244265
ReplaceType = -1;
245266
t = term;
246-
if ( !*t ) { TermFree(n_coef,"NormCoef"); TermFree(n_llnum,"n_llnum"); return(regval); }
267+
if ( !*t ) {
268+
AT.NormDepth--;
269+
TermFree(n_coef,"NormCoef");
270+
TermFree(n_llnum,"n_llnum");
271+
return(regval);
272+
}
247273
r = t + *t;
248274
ncoef = r[-1];
249275
i = ABS(ncoef);
@@ -4134,6 +4160,7 @@ regularratfun:;
41344160
C->numrhs = oldtoprhs;
41354161
C->Pointer = C->Buffer + oldcpointer;
41364162
*/
4163+
AT.NormDepth--;
41374164
TermFree(n_llnum,"n_llnum");
41384165
TermFree(n_coef,"NormCoef");
41394166
return(1);
@@ -4161,6 +4188,7 @@ regularratfun:;
41614188
TermAssign(term);
41624189
}
41634190
*/
4191+
AT.NormDepth--;
41644192
TermFree(n_llnum,"n_llnum");
41654193
TermFree(n_coef,"NormCoef");
41664194
return(regval);
@@ -4186,11 +4214,13 @@ regularratfun:;
41864214
NormZero:
41874215
*term = 0;
41884216
AT.WorkPointer = termout;
4217+
AT.NormDepth--;
41894218
TermFree(n_llnum,"n_llnum");
41904219
TermFree(n_coef,"NormCoef");
41914220
return(regval);
41924221

41934222
NormMin:
4223+
AT.NormDepth--;
41944224
TermFree(n_llnum,"n_llnum");
41954225
TermFree(n_coef,"NormCoef");
41964226
return(-1);
@@ -4199,6 +4229,7 @@ regularratfun:;
41994229
MLOCK(ErrorMessageLock);
42004230
MesCall("Norm");
42014231
MUNLOCK(ErrorMessageLock);
4232+
AT.NormDepth--;
42024233
TermFree(n_llnum,"n_llnum");
42034234
TermFree(n_coef,"NormCoef");
42044235
return(-1);
@@ -5164,7 +5195,10 @@ void DropSymbols(PHEAD WORD *term)
51645195
int SymbolNormalize(WORD *term)
51655196
{
51665197
GETIDENTITY
5167-
WORD buffer[7*NORMSIZE], *t, *b, *bb, *tt, *m, *tstop;
5198+
WORD *t, *b, *bb, *tt, *m, *tstop;
5199+
// Here we use a stack-allocated array, since things are much smaller
5200+
// compared to the full Normalize routine.
5201+
WORD buffer[7*NORMSIZE];
51685202
int i;
51695203
b = buffer;
51705204
*b++ = SYMBOL; *b++ = 2;

sources/setfile.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,32 @@ int MakeSetupAllocs(void)
11251125

11261126
/*
11271127
#] MakeSetupAllocs :
1128+
#[ AllocNormData :
1129+
1130+
Allocate the arrays within the NORMDATA struct. These are dynamic,
1131+
such that valgrind can detect buffer overruns in these arrays.
1132+
*/
1133+
1134+
NORMDATA* AllocNormData(void)
1135+
{
1136+
NORMDATA* tmp = Malloc1(sizeof(NORMDATA), "NormData struct");
1137+
1138+
tmp->psym = Malloc1(7*NORMSIZE*sizeof(*(tmp->psym)), "Normalize struct psym");
1139+
tmp->pvec = Malloc1(1*NORMSIZE*sizeof(*(tmp->pvec)), "Normalize struct pvec");
1140+
tmp->pdot = Malloc1(3*NORMSIZE*sizeof(*(tmp->pdot)), "Normalize struct pdot");
1141+
tmp->pdel = Malloc1(2*NORMSIZE*sizeof(*(tmp->pdel)), "Normalize struct pdel");
1142+
tmp->pind = Malloc1(1*NORMSIZE*sizeof(*(tmp->pind)), "Normalize struct pind");
1143+
tmp->peps = Malloc1(NORMSIZE/3*sizeof(*(tmp->peps)), "Normalize struct peps");
1144+
tmp->pden = Malloc1(NORMSIZE/3*sizeof(*(tmp->pden)), "Normalize struct pden");
1145+
tmp->pcom = Malloc1(1*NORMSIZE*sizeof(*(tmp->pcom)), "Normalize struct pcom");
1146+
tmp->pnco = Malloc1(1*NORMSIZE*sizeof(*(tmp->pnco)), "Normalize struct pnco");
1147+
tmp->pcon = Malloc1(2*NORMSIZE*sizeof(*(tmp->pcon)), "Normalize struct pcon");
1148+
1149+
return tmp;
1150+
}
1151+
1152+
/*
1153+
#] AllocNormData :
11281154
#[ TryFileSetups :
11291155
11301156
Routine looks in the input file for a start of the type

sources/startup.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,11 @@ void StartVariables(void)
12321232
AR.wranfnpair1 = NPAIR1;
12331233
AR.wranfnpair2 = NPAIR2;
12341234
AR.wranfseed = 0;
1235+
1236+
AT.NormData = Malloc1(sizeof(*(AT.NormData)), "NormData pointers");
1237+
AT.NormDataSize = 1;
1238+
AT.NormData[0] = AllocNormData();
1239+
AT.NormDepth = 0;
12351240
#endif
12361241
AM.atstartup = 1;
12371242
AM.oldnumextrasymbols = strDup1((UBYTE *)"OLDNUMEXTRASYMBOLS_","oldnumextrasymbols");

sources/structs.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,6 +1373,26 @@ typedef struct {
13731373
WORD flags;
13741374
} TERMINFO;
13751375

1376+
/*
1377+
Struct to hold temporary data in Normalize, so that it is not allocated
1378+
on the stack each function call. The sizes of these arrays introduces a
1379+
maximum complexity of terms which can be normalized. Each array is
1380+
allocated dynamically, by AllocNormData, such that valgrind can detect
1381+
errors if they are over-run.
1382+
*/
1383+
typedef struct NoRmDaTa {
1384+
WORD *psym;
1385+
WORD *pvec;
1386+
WORD *pdot;
1387+
WORD *pdel;
1388+
WORD *pind;
1389+
WORD **peps;
1390+
WORD **pden;
1391+
WORD **pcom;
1392+
WORD **pnco;
1393+
WORD **pcon;
1394+
} NORMDATA;
1395+
13761396
/*
13771397
#] Varia :
13781398
#[ A :
@@ -2081,6 +2101,9 @@ struct T_const {
20812101
void *aux_;
20822102
void *auxr_;
20832103
#endif
2104+
NORMDATA **NormData;
2105+
LONG NormDataSize;
2106+
LONG NormDepth;
20842107
PARTI partitions;
20852108
LONG sBer; /* (T) Size of the Bernoullis buffer */
20862109
LONG pWorkPointer; /* (R) Offset-pointer in pWorkSpace */

sources/threads.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,12 @@ ALLPRIVATES *InitializeOneThread(int identity)
737737
AR.wranfnpair1 = NPAIR1;
738738
AR.wranfnpair2 = NPAIR2;
739739
AR.wranfseed = 0;
740+
741+
AT.NormData = Malloc1(sizeof(*(AT.NormData)), "NormData thread pointers");
742+
AT.NormDataSize = 1;
743+
AT.NormData[0] = AllocNormData();
744+
AT.NormDepth = 0;
745+
740746
AN.SplitScratch = 0;
741747
AN.SplitScratchSize = AN.InScratch = 0;
742748
AN.SplitScratch1 = 0;

0 commit comments

Comments
 (0)