1 | /*************************************************************************** |
---|
2 | * blitz/tuning.h Platform-specific code tuning |
---|
3 | * |
---|
4 | * $Id: tuning.h,v 1.4 2003/01/14 11:29:18 patricg Exp $ |
---|
5 | * |
---|
6 | * Copyright (C) 1997-2001 Todd Veldhuizen <tveldhui@oonumerics.org> |
---|
7 | * |
---|
8 | * This program is free software; you can redistribute it and/or |
---|
9 | * modify it under the terms of the GNU General Public License |
---|
10 | * as published by the Free Software Foundation; either version 2 |
---|
11 | * of the License, or (at your option) any later version. |
---|
12 | * |
---|
13 | * This program is distributed in the hope that it will be useful, |
---|
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
16 | * GNU General Public License for more details. |
---|
17 | * |
---|
18 | * Suggestions: blitz-dev@oonumerics.org |
---|
19 | * Bugs: blitz-bugs@oonumerics.org |
---|
20 | * |
---|
21 | * For more information, please see the Blitz++ Home Page: |
---|
22 | * http://oonumerics.org/blitz/ |
---|
23 | * |
---|
24 | ***************************************************************************/ |
---|
25 | |
---|
26 | #ifndef BZ_TUNING_H |
---|
27 | #define BZ_TUNING_H |
---|
28 | |
---|
29 | // These estimates should be conservative (i.e. underestimate the |
---|
30 | // cache sizes). |
---|
31 | #define BZ_L1_CACHE_ESTIMATED_SIZE 8192 |
---|
32 | #define BZ_L2_CACHE_ESTIMATED_SIZE 65536 |
---|
33 | |
---|
34 | |
---|
35 | #undef BZ_PARTIAL_LOOP_UNROLL |
---|
36 | #define BZ_PASS_EXPR_BY_VALUE |
---|
37 | #undef BZ_PTR_INC_FASTER_THAN_INDIRECTION |
---|
38 | #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR |
---|
39 | #undef BZ_KCC_COPY_PROPAGATION_KLUDGE |
---|
40 | #undef BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS |
---|
41 | #undef BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE |
---|
42 | #define BZ_INLINE_GROUP1 |
---|
43 | #define BZ_INLINE_GROUP2 |
---|
44 | #define BZ_COLLAPSE_LOOPS |
---|
45 | #define BZ_USE_FAST_READ_ARRAY_EXPR |
---|
46 | #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE |
---|
47 | #undef BZ_ARRAY_SPACE_FILLING_TRAVERSAL |
---|
48 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL |
---|
49 | #undef BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS |
---|
50 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
51 | #define BZ_ARRAY_2D_STENCIL_TILING |
---|
52 | #define BZ_ARRAY_2D_STENCIL_TILE_SIZE 128 |
---|
53 | #undef BZ_INTERLACE_ARRAYS |
---|
54 | #undef BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY |
---|
55 | #define BZ_FAST_COMPILE |
---|
56 | |
---|
57 | |
---|
58 | #ifndef BZ_DISABLE_NEW_ET |
---|
59 | #define BZ_NEW_EXPRESSION_TEMPLATES |
---|
60 | #endif |
---|
61 | |
---|
62 | #ifdef BZ_FAST_COMPILE |
---|
63 | #define BZ_ETPARMS_CONSTREF |
---|
64 | #define BZ_NO_INLINE_ET |
---|
65 | #endif |
---|
66 | |
---|
67 | /* |
---|
68 | * Platform-specific tuning |
---|
69 | */ |
---|
70 | |
---|
71 | #ifdef _CRAYT3E |
---|
72 | // The backend compiler on the T3E does a better job of |
---|
73 | // loop unrolling. |
---|
74 | #undef BZ_PARTIAL_LOOP_UNROLL |
---|
75 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL |
---|
76 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
77 | #endif |
---|
78 | |
---|
79 | #ifdef __GNUC__ |
---|
80 | // The egcs compiler does a good job of loop unrolling, if |
---|
81 | // -funroll-loops is used. |
---|
82 | #undef BZ_PARTIAL_LOOP_UNROLL |
---|
83 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL |
---|
84 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
85 | #endif |
---|
86 | |
---|
87 | #ifdef BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE |
---|
88 | #undef BZ_KCC_COPY_PROPAGATION_KLUDGE |
---|
89 | #endif |
---|
90 | |
---|
91 | #ifdef BZ_INLINE_GROUP1 |
---|
92 | #define _bz_inline1 inline |
---|
93 | #else |
---|
94 | #define _bz_inline1 |
---|
95 | #endif |
---|
96 | |
---|
97 | #ifdef BZ_INLINE_GROUP2 |
---|
98 | #define _bz_inline2 inline |
---|
99 | #else |
---|
100 | #define _bz_inline2 |
---|
101 | #endif |
---|
102 | |
---|
103 | #ifdef BZ_NO_INLINE_ET |
---|
104 | #define _bz_inline_et |
---|
105 | #else |
---|
106 | #define _bz_inline_et inline |
---|
107 | #endif |
---|
108 | |
---|
109 | #ifdef BZ_ETPARMS_CONSTREF |
---|
110 | #define BZ_ETPARM(X) const X& |
---|
111 | #else |
---|
112 | #define BZ_ETPARM(X) X |
---|
113 | #endif |
---|
114 | |
---|
115 | #ifdef __DECCXX |
---|
116 | // The DEC cxx compiler has problems with loop unrolling |
---|
117 | // because of aliasing. Loop unrolling and anti-aliasing |
---|
118 | // is done by Blitz++. |
---|
119 | |
---|
120 | #define BZ_PARTIAL_LOOP_UNROLL |
---|
121 | #define BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS |
---|
122 | #define BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
123 | #endif |
---|
124 | |
---|
125 | /* |
---|
126 | * BZ_NO_PROPAGATE(X) prevents the compiler from performing |
---|
127 | * copy propagation on a variable. This is used for loop |
---|
128 | * unrolling to prevent KAI C++ from rearranging the |
---|
129 | * ordering of memory accesses. |
---|
130 | */ |
---|
131 | |
---|
132 | #define BZ_NO_PROPAGATE(X) X |
---|
133 | |
---|
134 | #ifdef __KCC |
---|
135 | #ifdef BZ_USE_NO_PROPAGATE |
---|
136 | extern "C" int __kai_apply(const char*, ...); |
---|
137 | |
---|
138 | #undef BZ_NO_PROPAGATE(X) |
---|
139 | #define BZ_NO_PROPAGATE(X) __kai_apply("(%a)",&X) |
---|
140 | #endif |
---|
141 | #endif |
---|
142 | |
---|
143 | #endif // BZ_TUNING_H |
---|