-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.h
170 lines (143 loc) · 5.45 KB
/
config.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/**********************************************************************
* BSOF/I - Block structured factorization and inversion codes
* for CPU+GPU platforms
* Copyright (c) 2013, Sergiy Gogolenko
* e-mail: [email protected]
**********************************************************************
* Description:
* Configuration file
*/
#ifndef __CONFIG_H__
#define __CONFIG_H__
#pragma once
/* #undef __SINGLE_PREC__ */
#define USE_FORTRAN
#define F77MANGLING _
/**************************************************
* Definitions for portability *
**************************************************/
#define FROUTINE(f77name, F77NAME) CONCAT(f77name, F77MANGLING)
#define FCOMMONBLOCK(NAME) CONCAT(NAME, F77MANGLING)
#if defined(__C99__) \
|| ( defined(__cplusplus) && (__cplusplus > 199711) ) \
|| ( defined(__INTEL_COMPILER) && __INTEL_COMPILER > 1000 )
# define USERDEF_PRAGMA(directive) _Pragma(#directive)
#elif defined(_MSC_VER)
# define USERDEF_PRAGMA(directive) __pragma(directive)
#else
# define USERDEF_PRAGMA(directive)
/* #warning User defined pragmas are not used */
#endif
#if defined __INTEL_COMPILER
# define FORCE_INLINE inline
# define RESTRICT __restrict__
# define FORCE_VECT_LOOP USERDEF_PRAGMA(ivdep)/*simd statement*/
# define FORCE_VECT_LOOP2 USERDEF_PRAGMA(vector always)
# define ASSUME_ALIGNED(P) P
#elif defined __GNUC__
# define FORCE_INLINE inline /* attribute(always_inline) */
# define RESTRICT __restrict__
# define FORCE_VECT_LOOP USERDEF_PRAGMA(vector always)
# define FORCE_VECT_LOOP2 USERDEF_PRAGMA(vector always)
# if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)
# define ASSUME_ALIGNED(P) __builtin_assume_aligned (P, USE_ALIGN)
# else
# define ASSUME_ALIGNED(P) P
# endif
#elif defined _MSC_VER
# define FORCE_INLINE inline /* __forceinline */
# define RESTRICT __restrict
# define FORCE_VECT_LOOP USERDEF_PRAGMA(ivdep)
# define FORCE_VECT_LOOP2 USERDEF_PRAGMA(vector always)
# define ASSUME_ALIGNED(P) P
#endif
#ifdef _MSC_VER
# define WARNING_PRAGMA(__text) __pragma(message #__text)
#else
#endif
#ifdef HAS_MKL
# include <mkl_service.h>
# define SET_NUM_BLAS_THREADS(N) mkl_set_num_threads(N)
#else
# define SET_NUM_BLAS_THREADS(N)
#endif
/**************************************************
* Macro for processor control *
**************************************************/
#ifdef __GNUC__
#define DISABLE_SSE_EXCEPTIONS() { \
int aux; \
asm( \
"stmxcsr %[aux] \n\t" \
"orl $32832, %[aux] \n\t" \
"ldmxcsr %[aux] \n\t" \
: : [aux] "m" (aux)); \
}
#else
#define DISABLE_SSE_EXCEPTIONS()
#endif
/**************************************************
* Definitions for profiling & benchmarking *
**************************************************/
#ifdef USE_PROF
# define PRROUTINE(f77name, F77NAME) \
FROUTINE(CONCAT(my, f77name), CONCAT(MY, F77NAME))
# define PROF(COUNTER, FLOPS, CODE) { \
if(!tid) { \
PROFILE_BEGIN(fcnt ## COUNTER);} \
CODE; if(!tid) { \
PROFILE_END(fcnt ## COUNTER); \
PROFILE_INC_FLOPS_CNT(COUNTER, FLOPS);} \
}
#else
# define PROF(COUNTER, FLOPS, CODE) CODE
#endif
/**************************************************
* Macro for debugging *
**************************************************/
#define DBGPRINTF(__fmt, ...) \
fprintf(stderr, "[file %s, line %d]: " __fmt, \
__FILE__, __LINE__, __VA_ARGS__)
/* Valid for GCC, but doestn't work with ICC */
/* #define DBGERROR(__fmt, ...) DBGPRINTF("Error>> " __fmt "\n", __VA_ARGS__) */
#define DBGERROR(...) \
fprintf(stderr, "Error[%s, l%d]: " \
__ARGS_FIRST__(__VA_ARGS__) "\n", \
__FILE__, __LINE__ __ARGS_REST__(__VA_ARGS__))
/**************************************************
* Precision related definitions *
**************************************************/
#ifdef __SINGLE_PREC__
typedef float scalar_t;
#else
typedef double scalar_t;
#endif
/**************************************************
* Extra macro *
**************************************************/
/* Concatenation of names */
#define CONCAT(STR1, STR2) CONCAT_NONMACRO(STR1, STR2)
#define CONCAT_NONMACRO(STR1, STR2) STR1 ## STR2
/* Expantion to the first argument in __VA_ARGS__
* If there's only one argument, expands to nothing.
* If there is more than one argument, expands to a comma
* followed by everything but the first argument.
*
* This trick works equally well with GCC and ICC
* New macro: __VA_ARGS_FIRST__ and __VA_ARGS_REST__
* Attantion: we implement support up to 9 arguments!
* Based on:
* http://stackoverflow.com/questions/5588855/standard-alternative-to-gccs-va-args-trick
*/
#define __ARGS_FIRST__(...) __ARGS_GET_FIRST__(__VA_ARGS__, __throwaway)
#define __ARGS_GET_FIRST__(__first, ...) __first
#define __ARGS_REST__(...) __ARGS_GET_REST__(__ARGS_NUM__(__VA_ARGS__), __VA_ARGS__)
#define __ARGS_GET_REST__(__qty, ...) __ARGS_PUT_NAMED__(__qty, __VA_ARGS__)
#define __ARGS_PUT_NAMED__(__qty, ...) __ARGS_PUT_##__qty(__VA_ARGS__)
#define __ARGS_PUT_ONE(__first)
#define __ARGS_PUT_TWOORMORE(first, ...) , __VA_ARGS__
#define __ARGS_NUM__(...) \
__ARGS_SELECT_10TH__(__VA_ARGS__, TWOORMORE, TWOORMORE, TWOORMORE, TWOORMORE, \
TWOORMORE, TWOORMORE, TWOORMORE, TWOORMORE, ONE, throwaway)
#define __ARGS_SELECT_10TH__(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, ...) a10
#endif