-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.h
140 lines (102 loc) · 3.11 KB
/
utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#ifndef UTILS_H
#define UTILS_H
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "typedefs.h"
int get_size_with_padding(const int n);
static inline int is_aligned(const void *restrict const ptr)
{
return ((uintptr_t)ptr % ALIGNMENT == 0);
}
static inline int min(int a, int b)
{
return a < b ? a : b;
}
static inline int max(int a, int b)
{
return a > b ? a : b;
}
static inline double minf(double x, double y)
{
return x < y ? x : y;
}
static inline double maxf(double x, double y)
{
return x > y ? x : y;
}
static inline double find_absmax_in_2x2(const double *restrict const A, int ldA)
{
double largest = maxf(fabs(A[0]), fabs(A[1]));
largest = maxf(largest, fabs(A[ldA]));
largest = maxf(largest, fabs(A[ldA + 1]));
return largest;
}
static inline scaling_t min_element(int n, const scaling_t *restrict const a)
{
scaling_t smallest;
#ifdef INTSCALING
smallest = 0;
#else
smallest = INFINITY;
#endif
for (int i = 0; i < n; i++)
if (a[i] < smallest)
smallest = a[i];
return smallest;
}
static inline void set_zero(int n, int m, double *restrict const A, int ldA)
{
for (int j = 0; j < m; j++)
for (int i = 0; i < n; i++)
A[(size_t) j * ldA + i] = 0.0;
}
static inline void copy_block(int n, int m,
const double *restrict const Ain, int ldA,
double *restrict const Bout, int ldB)
{
#define Ain(i,j) Ain[(i) + ldA * (j)]
#define Bout(i,j) Bout[(i) + ldB * (j)]
for (int j = 0; j < m; j++) {
for (int i = 0; i < n; i++) {
Bout(i,j) = Ain(i,j);
}
}
#undef Ain
#undef Bout
}
void copy_matrix(
double ***in_blocks, int ldin,
double ***out_blocks, int ldout,
partitioning_t *p,
memory_layout_t layout);
void scale(
int n, double *restrict const x, const scaling_t *beta);
void scale_tile(int m, int n,
double *restrict const X, int ldX, const scaling_t *beta);
void scale_excluding(int m, int n, int ilo, int ihi, int jlo, int jhi,
double *restrict const X, int ldX, const scaling_t *beta);
double convert_scaling(scaling_t alpha);
void init_scaling_factor(
int n, scaling_t *restrict const alpha);
double compute_upscaling(
scaling_t alpha_min, scaling_t alpha);
double compute_combined_upscaling(
scaling_t alpha_min, scaling_t alpha, scaling_t beta);
void update_global_scaling(
scaling_t *global, scaling_t phi);
void update_norm(double *norm, scaling_t phi);
/// Copy all selected eigenvalues to a compact memory representation.
void compact_eigenvalues(int n, const int *restrict selected,
const double *restrict lambda, const int *restrict lambda_type,
double *restrict compact_lambda, int *restrict compact_lambda_type);
void dgemm(
const char transa, const char transb,
const int m, const int n, const int k,
const double alpha, const double *restrict const A, const int ldA,
const double *restrict const B, const int ldB,
const double beta, double *restrict C, const int ldC);
double dlange(const char norm, const int m, const int n,
const double *restrict const A, int ldA);
#endif