Vector Optimized Library of Kernels  2.3
Architecture-tuned implementations of math kernels
volk_64f_x2_min_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
72 #ifndef INCLUDED_volk_64f_x2_min_64f_a_H
73 #define INCLUDED_volk_64f_x2_min_64f_a_H
74 
75 #include <inttypes.h>
76 #include <stdio.h>
77 
78 #ifdef LV_HAVE_AVX512F
79 #include <immintrin.h>
80 
81 static inline void volk_64f_x2_min_64f_a_avx512f(double* cVector,
82  const double* aVector,
83  const double* bVector,
84  unsigned int num_points)
85 {
86  unsigned int number = 0;
87  const unsigned int eigthPoints = num_points / 8;
88 
89  double* cPtr = cVector;
90  const double* aPtr = aVector;
91  const double* bPtr = bVector;
92 
93  __m512d aVal, bVal, cVal;
94  for (; number < eigthPoints; number++) {
95 
96  aVal = _mm512_load_pd(aPtr);
97  bVal = _mm512_load_pd(bPtr);
98 
99  cVal = _mm512_min_pd(aVal, bVal);
100 
101  _mm512_store_pd(cPtr, cVal); // Store the results back into the C container
102 
103  aPtr += 8;
104  bPtr += 8;
105  cPtr += 8;
106  }
107 
108  number = eigthPoints * 8;
109  for (; number < num_points; number++) {
110  const double a = *aPtr++;
111  const double b = *bPtr++;
112  *cPtr++ = (a < b ? a : b);
113  }
114 }
115 #endif /* LV_HAVE_AVX512F */
116 
117 
118 #ifdef LV_HAVE_AVX
119 #include <immintrin.h>
120 
121 static inline void volk_64f_x2_min_64f_a_avx(double* cVector,
122  const double* aVector,
123  const double* bVector,
124  unsigned int num_points)
125 {
126  unsigned int number = 0;
127  const unsigned int quarterPoints = num_points / 4;
128 
129  double* cPtr = cVector;
130  const double* aPtr = aVector;
131  const double* bPtr = bVector;
132 
133  __m256d aVal, bVal, cVal;
134  for (; number < quarterPoints; number++) {
135 
136  aVal = _mm256_load_pd(aPtr);
137  bVal = _mm256_load_pd(bPtr);
138 
139  cVal = _mm256_min_pd(aVal, bVal);
140 
141  _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
142 
143  aPtr += 4;
144  bPtr += 4;
145  cPtr += 4;
146  }
147 
148  number = quarterPoints * 4;
149  for (; number < num_points; number++) {
150  const double a = *aPtr++;
151  const double b = *bPtr++;
152  *cPtr++ = (a < b ? a : b);
153  }
154 }
155 #endif /* LV_HAVE_AVX */
156 
157 
158 #ifdef LV_HAVE_SSE2
159 #include <emmintrin.h>
160 
161 static inline void volk_64f_x2_min_64f_a_sse2(double* cVector,
162  const double* aVector,
163  const double* bVector,
164  unsigned int num_points)
165 {
166  unsigned int number = 0;
167  const unsigned int halfPoints = num_points / 2;
168 
169  double* cPtr = cVector;
170  const double* aPtr = aVector;
171  const double* bPtr = bVector;
172 
173  __m128d aVal, bVal, cVal;
174  for (; number < halfPoints; number++) {
175 
176  aVal = _mm_load_pd(aPtr);
177  bVal = _mm_load_pd(bPtr);
178 
179  cVal = _mm_min_pd(aVal, bVal);
180 
181  _mm_store_pd(cPtr, cVal); // Store the results back into the C container
182 
183  aPtr += 2;
184  bPtr += 2;
185  cPtr += 2;
186  }
187 
188  number = halfPoints * 2;
189  for (; number < num_points; number++) {
190  const double a = *aPtr++;
191  const double b = *bPtr++;
192  *cPtr++ = (a < b ? a : b);
193  }
194 }
195 #endif /* LV_HAVE_SSE2 */
196 
197 
198 #ifdef LV_HAVE_GENERIC
199 
200 static inline void volk_64f_x2_min_64f_generic(double* cVector,
201  const double* aVector,
202  const double* bVector,
203  unsigned int num_points)
204 {
205  double* cPtr = cVector;
206  const double* aPtr = aVector;
207  const double* bPtr = bVector;
208  unsigned int number = 0;
209 
210  for (number = 0; number < num_points; number++) {
211  const double a = *aPtr++;
212  const double b = *bPtr++;
213  *cPtr++ = (a < b ? a : b);
214  }
215 }
216 #endif /* LV_HAVE_GENERIC */
217 
218 
219 #endif /* INCLUDED_volk_64f_x2_min_64f_a_H */
220 
221 #ifndef INCLUDED_volk_64f_x2_min_64f_u_H
222 #define INCLUDED_volk_64f_x2_min_64f_u_H
223 
224 #include <inttypes.h>
225 #include <stdio.h>
226 
227 #ifdef LV_HAVE_AVX512F
228 #include <immintrin.h>
229 
230 static inline void volk_64f_x2_min_64f_u_avx512f(double* cVector,
231  const double* aVector,
232  const double* bVector,
233  unsigned int num_points)
234 {
235  unsigned int number = 0;
236  const unsigned int eigthPoints = num_points / 8;
237 
238  double* cPtr = cVector;
239  const double* aPtr = aVector;
240  const double* bPtr = bVector;
241 
242  __m512d aVal, bVal, cVal;
243  for (; number < eigthPoints; number++) {
244 
245  aVal = _mm512_loadu_pd(aPtr);
246  bVal = _mm512_loadu_pd(bPtr);
247 
248  cVal = _mm512_min_pd(aVal, bVal);
249 
250  _mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
251 
252  aPtr += 8;
253  bPtr += 8;
254  cPtr += 8;
255  }
256 
257  number = eigthPoints * 8;
258  for (; number < num_points; number++) {
259  const double a = *aPtr++;
260  const double b = *bPtr++;
261  *cPtr++ = (a < b ? a : b);
262  }
263 }
264 #endif /* LV_HAVE_AVX512F */
265 
266 
267 #ifdef LV_HAVE_AVX
268 #include <immintrin.h>
269 
270 static inline void volk_64f_x2_min_64f_u_avx(double* cVector,
271  const double* aVector,
272  const double* bVector,
273  unsigned int num_points)
274 {
275  unsigned int number = 0;
276  const unsigned int quarterPoints = num_points / 4;
277 
278  double* cPtr = cVector;
279  const double* aPtr = aVector;
280  const double* bPtr = bVector;
281 
282  __m256d aVal, bVal, cVal;
283  for (; number < quarterPoints; number++) {
284 
285  aVal = _mm256_loadu_pd(aPtr);
286  bVal = _mm256_loadu_pd(bPtr);
287 
288  cVal = _mm256_min_pd(aVal, bVal);
289 
290  _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
291 
292  aPtr += 4;
293  bPtr += 4;
294  cPtr += 4;
295  }
296 
297  number = quarterPoints * 4;
298  for (; number < num_points; number++) {
299  const double a = *aPtr++;
300  const double b = *bPtr++;
301  *cPtr++ = (a < b ? a : b);
302  }
303 }
304 #endif /* LV_HAVE_AVX */
305 
306 
307 #endif /* INCLUDED_volk_64f_x2_min_64f_u_H */
volk_64f_x2_min_64f_a_sse2
static void volk_64f_x2_min_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:161
volk_64f_x2_min_64f_generic
static void volk_64f_x2_min_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:200
volk_64f_x2_min_64f_a_avx
static void volk_64f_x2_min_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:121
volk_64f_x2_min_64f_u_avx
static void volk_64f_x2_min_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:270