-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathAVX2.cpp
175 lines (136 loc) · 5.63 KB
/
AVX2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/* *****************************************************************************
trimAl v2.0: a tool for automated alignment trimming in large-scale
phylogenetics analyses.
readAl v2.0: a tool for automated alignment conversion among different
formats.
2022-2023
Larralde, M. ([email protected])
2009-2019
Fernandez-Rodriguez V. ([email protected])
Capella-Gutierrez S. ([email protected])
Gabaldon, T. ([email protected])
This file is part of trimAl/readAl.
trimAl/readAl are free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, the last available version.
trimAl/readAl are distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with trimAl/readAl. If not, see <http://www.gnu.org/licenses/>.
***************************************************************************** */
#include <climits>
#include <cstdint>
#include <immintrin.h>
#include "Alignment/Alignment.h"
#include "InternalBenchmarker.h"
#include "Statistics/Gaps.h"
#include "Statistics/Manager.h"
#include "Statistics/Similarity.h"
#include "Platform/x86/AVX2.h"
#include "Platform/template.h"
#include "defines.h"
#include "reportsystem.h"
#include "utils.h"
class AVX2Vector {
private:
__m256i vector;
inline AVX2Vector(__m256i vec) : vector(vec) {}
public:
const static size_t LANES = 32;
const static size_t SIZE = sizeof(__m256i);
inline AVX2Vector() : vector(_mm256_setzero_si256()) {}
inline static AVX2Vector duplicate(const uint8_t value) {
return AVX2Vector(_mm256_set1_epi8(value));
}
inline static AVX2Vector load(const uint8_t *data) {
return AVX2Vector(_mm256_load_si256((const __m256i *)data));
}
inline static AVX2Vector loadu(const uint8_t *data) {
return AVX2Vector(_mm256_loadu_si256((const __m256i *)data));
}
inline void store(uint8_t *data) const {
_mm256_store_si256((__m256i *)data, vector);
}
inline void storeu(uint8_t *data) const {
_mm256_storeu_si256((__m256i *)data, vector);
}
inline AVX2Vector &operator+=(const AVX2Vector &rhs) {
vector = _mm256_add_epi8(vector, rhs.vector);
return *this;
}
inline AVX2Vector operator==(const AVX2Vector &rhs) const {
return AVX2Vector(_mm256_cmpeq_epi8(vector, rhs.vector));
}
inline AVX2Vector operator&(const AVX2Vector &rhs) const {
return AVX2Vector(_mm256_and_si256(vector, rhs.vector));
}
inline AVX2Vector operator|(const AVX2Vector &rhs) const {
return AVX2Vector(_mm256_or_si256(vector, rhs.vector));
}
inline AVX2Vector operator!() const {
return AVX2Vector(_mm256_andnot_si256(vector, _mm256_set1_epi8(0xFF)));
}
inline AVX2Vector andnot(const AVX2Vector &rhs) const {
return AVX2Vector(_mm256_andnot_si256(rhs.vector, vector));
}
#ifdef CPU_FEATURES_ARCH_X86_64
inline uint16_t sum() const {
__m256i sum256 = _mm256_sad_epu8(vector, _mm256_setzero_si256());
__m128i sum128 = _mm_add_epi64(_mm256_extractf128_si256(sum256, 1), _mm256_castsi256_si128(sum256));
/*
uint16_t res32 = _mm_extract_epi32(sum128, 0) + _mm_extract_epi32(sum128, 1);
uint16_t res32_2 = _mm_extract_epi32(sum128, 0) + _mm_extract_epi32(sum128, 2);
uint16_t res64 = _mm_extract_epi64(sum128, 0) + _mm_extract_epi64(sum128, 1);
if (res32 != res64) {
std::cout << "uint16_t res32 = " << res32 << "\n";
std::cout << "uint16_t res32_2 = " << res32_2 << "\n";
std::cout << "uint16_t res64 = " << res64 << "\n";
exit (1);
} else if (res32_2 != res64) {
std::cout << "uint16_t res32 = " << res32 << "\n";
std::cout << "uint16_t res32_2 = " << res32_2 << "\n";
std::cout << "uint16_t res64 = " << res64 << "\n";
exit (1);
}
*/
return _mm_extract_epi64(sum128, 0) + _mm_extract_epi64(sum128, 1);
}
#else
inline uint16_t sum() const {
__m256i sum256 = _mm256_sad_epu8(vector, _mm256_setzero_si256());
__m128i sum128 = _mm_add_epi64(_mm256_extractf128_si256(sum256, 1), _mm256_castsi256_si128(sum256));
std::cout << "NOT CPU_FEATURES_ARCH_X86_64 \n";
#if (defined(_M_X64) || defined(__x86_64__))
std::cout << "defined(_M_X64) || defined(__x86_64__) \n";
#endif
#if !defined(CPU_FEATURES_ARCH_VM)
std::cout << "!defined(CPU_FEATURES_ARCH_VM) \n";
#endif
exit (1);
return _mm_extract_epi32(sum128, 0) + _mm_extract_epi32(sum128, 2);
}
#endif
inline void clear() { vector = _mm256_setzero_si256(); }
};
namespace statistics {
bool AVX2Similarity::calculateVectors(bool cutByGap) {
StartTiming("bool AVX2Similarity::calculateVectors(bool cutByGap) ");
return simd::calculateSimilarityVectors<AVX2Vector>(*this, cutByGap);
}
void AVX2Gaps::CalculateVectors() {
StartTiming("bool AVX2Gaps::CalculateVectors() ");
simd::calculateGapVectors<AVX2Vector>(*this);
}
void AVX2Identity::calculateSeqIdentity() {
StartTiming("void AVX2Identity::calculateSeqIdentity() ");
simd::calculateSeqIdentity<AVX2Vector>(*this);
}
bool AVX2Overlap::calculateSpuriousVector(float overlap, float *spuriousVector) {
StartTiming("bool AVX2Overlap::calculateSpuriousVector(float overlap, float "
"*spuriousVector) ");
return simd::calculateSpuriousVector<AVX2Vector>(*this, overlap,
spuriousVector);
}
} // namespace statistics