Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32fc_index_max_16u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
76 #ifndef INCLUDED_volk_32fc_index_max_16u_a_H
77 #define INCLUDED_volk_32fc_index_max_16u_a_H
78 
79 #include <volk/volk_common.h>
80 #include <inttypes.h>
81 #include <stdio.h>
82 #include <limits.h>
83 #include <volk/volk_complex.h>
84 
85 #ifdef LV_HAVE_AVX2
86 #include <immintrin.h>
87 
88 static inline void
89 volk_32fc_index_max_16u_a_avx2(uint16_t* target, lv_32fc_t* src0,
90  uint32_t num_points)
91 {
92  num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
93  // Branchless version, if we think it'll make a difference
94  //num_points = USHRT_MAX ^ ((num_points ^ USHRT_MAX) & -(num_points < USHRT_MAX));
95 
96  const uint32_t num_bytes = num_points*8;
97 
98  union bit256 holderf;
99  union bit256 holderi;
100  float sq_dist = 0.0;
101 
102  union bit256 xmm5, xmm4;
103  __m256 xmm1, xmm2, xmm3;
104  __m256i xmm8, xmm11, xmm12, xmmfive, xmmfour, xmm9, holder0, holder1, xmm10;
105 
106  xmm5.int_vec = xmmfive = _mm256_setzero_si256();
107  xmm4.int_vec = xmmfour = _mm256_setzero_si256();
108  holderf.int_vec = holder0 = _mm256_setzero_si256();
109  holderi.int_vec = holder1 = _mm256_setzero_si256();
110 
111  int bound = num_bytes >> 6;
112  int i = 0;
113 
114  xmm8 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
115  xmm9 = _mm256_setzero_si256(); //=xmm8
116  xmm10 = _mm256_set1_epi32(8);
117  xmm3 = _mm256_setzero_ps();
118 
119  __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
120  for(; i < bound; ++i) {
121  xmm1 = _mm256_load_ps((float*)src0);
122  xmm2 = _mm256_load_ps((float*)&src0[4]);
123 
124  src0 += 8;
125 
126  xmm1 = _mm256_mul_ps(xmm1, xmm1);
127  xmm2 = _mm256_mul_ps(xmm2, xmm2);
128 
129  xmm1 = _mm256_hadd_ps(xmm1, xmm2);
130  xmm1 = _mm256_permutevar8x32_ps(xmm1, idx);
131 
132  xmm3 = _mm256_max_ps(xmm1, xmm3);
133 
134  xmm4.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_LT_OS);
135  xmm5.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_EQ_OQ);
136 
137  xmm11 = _mm256_and_si256(xmm8, xmm5.int_vec);
138  xmm12 = _mm256_and_si256(xmm9, xmm4.int_vec);
139 
140  xmm9 = _mm256_add_epi32(xmm11, xmm12);
141 
142  xmm8 = _mm256_add_epi32(xmm8, xmm10);
143  }
144  xmm10 = _mm256_set1_epi32(4);
145  if (num_bytes >> 5 & 1) {
146  xmm1 = _mm256_load_ps((float*)src0);
147 
148  src0 += 4;
149 
150  xmm1 = _mm256_mul_ps(xmm1, xmm1);
151 
152  xmm1 = _mm256_hadd_ps(xmm1, xmm1);
153  xmm1 = _mm256_permutevar8x32_ps(xmm1, idx);
154 
155  xmm3 = _mm256_max_ps(xmm1, xmm3);
156 
157  xmm4.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_LT_OS);
158  xmm5.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_EQ_OQ);
159 
160  xmm11 = _mm256_and_si256(xmm8, xmm5.int_vec);
161  xmm12 = _mm256_and_si256(xmm9, xmm4.int_vec);
162 
163  xmm9 = _mm256_add_epi32(xmm11, xmm12);
164 
165  xmm8 = _mm256_add_epi32(xmm8, xmm10);
166  }
167 
168  idx = _mm256_set_epi32(1,0,1,0,1,0,1,0);
169  xmm10 = _mm256_set1_epi32(2);
170  if (num_bytes >> 4 & 1) {
171  xmm2 = _mm256_load_ps((float*)src0);
172 
173  xmm1 = _mm256_permutevar8x32_ps(bit256_p(&xmm8)->float_vec, idx);
174  xmm8 = bit256_p(&xmm1)->int_vec;
175 
176  xmm2 = _mm256_mul_ps(xmm2, xmm2);
177 
178  src0 += 2;
179 
180  xmm1 = _mm256_hadd_ps(xmm2, xmm2);
181 
182  xmm3 = _mm256_max_ps(xmm1, xmm3);
183 
184  xmm4.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_LT_OS);
185  xmm5.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_EQ_OQ);
186 
187  xmm11 = _mm256_and_si256(xmm8, xmm5.int_vec);
188  xmm12 = _mm256_and_si256(xmm9, xmm4.int_vec);
189 
190  xmm9 = _mm256_add_epi32(xmm11, xmm12);
191 
192  xmm8 = _mm256_add_epi32(xmm8, xmm10);
193  }
194 
195  /*
196  idx = _mm256_setzero_si256();
197  for(i = 0; i < leftovers2; ++i) {
198  //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
199 
200  sq_dist = lv_creal(src0[0]) * lv_creal(src0[0]) + lv_cimag(src0[0]) * lv_cimag(src0[0]);
201 
202  //xmm = _mm_load1_ps(&sq_dist);//insert?
203  xmm2 = _mm256_set1_ps(sq_dist);
204  //xmm2 = _mm256_insertf128_ps(xmm2, xmm, 0);
205 
206  xmm1 = xmm3;
207 
208  xmm3 = _mm256_max_ps(xmm3, xmm2);//only lowest 32bit value
209  xmm3 = _mm256_permutevar8x32_ps(xmm3, idx);
210 
211  xmm4.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_LT_OS);
212  xmm5.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_EQ_OQ);
213 
214  xmm8 = _mm256_permutevar8x32_epi32(xmm8, idx);
215 
216  xmm11 = _mm256_and_si256(xmm8, xmm4.int_vec);
217  xmm12 = _mm256_and_si256(xmm9, xmm5.int_vec);
218 
219  xmm9 = _mm256_add_epi32(xmm11, xmm12);
220 }*/
221 
222  _mm256_store_ps((float*)&(holderf.f), xmm3);
223  _mm256_store_si256(&(holderi.int_vec), xmm9);
224 
225  target[0] = holderi.i[0];
226  sq_dist = holderf.f[0];
227  target[0] = (holderf.f[1] > sq_dist) ? holderi.i[1] : target[0];
228  sq_dist = (holderf.f[1] > sq_dist) ? holderf.f[1] : sq_dist;
229  target[0] = (holderf.f[2] > sq_dist) ? holderi.i[2] : target[0];
230  sq_dist = (holderf.f[2] > sq_dist) ? holderf.f[2] : sq_dist;
231  target[0] = (holderf.f[3] > sq_dist) ? holderi.i[3] : target[0];
232  sq_dist = (holderf.f[3] > sq_dist) ? holderf.f[3] : sq_dist;
233  target[0] = (holderf.f[4] > sq_dist) ? holderi.i[4] : target[0];
234  sq_dist = (holderf.f[4] > sq_dist) ? holderf.f[4] : sq_dist;
235  target[0] = (holderf.f[5] > sq_dist) ? holderi.i[5] : target[0];
236  sq_dist = (holderf.f[5] > sq_dist) ? holderf.f[5] : sq_dist;
237  target[0] = (holderf.f[6] > sq_dist) ? holderi.i[6] : target[0];
238  sq_dist = (holderf.f[6] > sq_dist) ? holderf.f[6] : sq_dist;
239  target[0] = (holderf.f[7] > sq_dist) ? holderi.i[7] : target[0];
240  sq_dist = (holderf.f[7] > sq_dist) ? holderf.f[7] : sq_dist;
241 
242 }
243 
244 #endif /*LV_HAVE_AVX2*/
245 
246 #ifdef LV_HAVE_SSE3
247 #include <xmmintrin.h>
248 #include <pmmintrin.h>
249 
250 static inline void
252  uint32_t num_points)
253 {
254  num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
255  // Branchless version, if we think it'll make a difference
256  //num_points = USHRT_MAX ^ ((num_points ^ USHRT_MAX) & -(num_points < USHRT_MAX));
257 
258  const uint32_t num_bytes = num_points*8;
259 
260  union bit128 holderf;
261  union bit128 holderi;
262  float sq_dist = 0.0;
263 
264  union bit128 xmm5, xmm4;
265  __m128 xmm1, xmm2, xmm3;
266  __m128i xmm8, xmm11, xmm12, xmmfive, xmmfour, xmm9, holder0, holder1, xmm10;
267 
268  xmm5.int_vec = xmmfive = _mm_setzero_si128();
269  xmm4.int_vec = xmmfour = _mm_setzero_si128();
270  holderf.int_vec = holder0 = _mm_setzero_si128();
271  holderi.int_vec = holder1 = _mm_setzero_si128();
272 
273  int bound = num_bytes >> 5;
274  int i = 0;
275 
276  xmm8 = _mm_set_epi32(3, 2, 1, 0);//remember the crazy reverse order!
277  xmm9 = _mm_setzero_si128();
278  xmm10 = _mm_set_epi32(4, 4, 4, 4);
279  xmm3 = _mm_setzero_ps();
280  //printf("%f, %f, %f, %f\n", ((float*)&xmm10)[0], ((float*)&xmm10)[1], ((float*)&xmm10)[2], ((float*)&xmm10)[3]);
281 
282  for(; i < bound; ++i) {
283  xmm1 = _mm_load_ps((float*)src0);
284  xmm2 = _mm_load_ps((float*)&src0[2]);
285 
286  src0 += 4;
287 
288  xmm1 = _mm_mul_ps(xmm1, xmm1);
289  xmm2 = _mm_mul_ps(xmm2, xmm2);
290 
291  xmm1 = _mm_hadd_ps(xmm1, xmm2);
292 
293  xmm3 = _mm_max_ps(xmm1, xmm3);
294 
295  xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
296  xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
297 
298  xmm11 = _mm_and_si128(xmm8, xmm5.int_vec);
299  xmm12 = _mm_and_si128(xmm9, xmm4.int_vec);
300 
301  xmm9 = _mm_add_epi32(xmm11, xmm12);
302 
303  xmm8 = _mm_add_epi32(xmm8, xmm10);
304 
305  //printf("%f, %f, %f, %f\n", ((float*)&xmm3)[0], ((float*)&xmm3)[1], ((float*)&xmm3)[2], ((float*)&xmm3)[3]);
306  //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm10)[0], ((uint32_t*)&xmm10)[1], ((uint32_t*)&xmm10)[2], ((uint32_t*)&xmm10)[3]);
307  }
308 
309 
310  if (num_bytes >> 4 & 1) {
311  xmm2 = _mm_load_ps((float*)src0);
312 
313  xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec);
314  xmm8 = bit128_p(&xmm1)->int_vec;
315 
316  xmm2 = _mm_mul_ps(xmm2, xmm2);
317 
318  src0 += 2;
319 
320  xmm1 = _mm_hadd_ps(xmm2, xmm2);
321 
322  xmm3 = _mm_max_ps(xmm1, xmm3);
323 
324  xmm10 = _mm_set_epi32(2, 2, 2, 2);//load1_ps((float*)&init[2]);
325 
326  xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
327  xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
328 
329  xmm11 = _mm_and_si128(xmm8, xmm5.int_vec);
330  xmm12 = _mm_and_si128(xmm9, xmm4.int_vec);
331 
332  xmm9 = _mm_add_epi32(xmm11, xmm12);
333 
334  xmm8 = _mm_add_epi32(xmm8, xmm10);
335  //printf("egads%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
336  }
337 
338  if (num_bytes >> 3 & 1) {
339  //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
340 
341  sq_dist = lv_creal(src0[0]) * lv_creal(src0[0]) + lv_cimag(src0[0]) * lv_cimag(src0[0]);
342 
343  xmm2 = _mm_load1_ps(&sq_dist);
344 
345  xmm1 = xmm3;
346 
347  xmm3 = _mm_max_ss(xmm3, xmm2);
348 
349  xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
350  xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
351 
352  xmm8 = _mm_shuffle_epi32(xmm8, 0x00);
353 
354  xmm11 = _mm_and_si128(xmm8, xmm4.int_vec);
355  xmm12 = _mm_and_si128(xmm9, xmm5.int_vec);
356 
357  xmm9 = _mm_add_epi32(xmm11, xmm12);
358  }
359 
360  //printf("%f, %f, %f, %f\n", ((float*)&xmm3)[0], ((float*)&xmm3)[1], ((float*)&xmm3)[2], ((float*)&xmm3)[3]);
361  //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
362 
363  _mm_store_ps((float*)&(holderf.f), xmm3);
364  _mm_store_si128(&(holderi.int_vec), xmm9);
365 
366  target[0] = holderi.i[0];
367  sq_dist = holderf.f[0];
368  target[0] = (holderf.f[1] > sq_dist) ? holderi.i[1] : target[0];
369  sq_dist = (holderf.f[1] > sq_dist) ? holderf.f[1] : sq_dist;
370  target[0] = (holderf.f[2] > sq_dist) ? holderi.i[2] : target[0];
371  sq_dist = (holderf.f[2] > sq_dist) ? holderf.f[2] : sq_dist;
372  target[0] = (holderf.f[3] > sq_dist) ? holderi.i[3] : target[0];
373  sq_dist = (holderf.f[3] > sq_dist) ? holderf.f[3] : sq_dist;
374 
375  /*
376  float placeholder = 0.0;
377  uint32_t temp0, temp1;
378  uint32_t g0 = (((float*)&xmm3)[0] > ((float*)&xmm3)[1]);
379  uint32_t l0 = g0 ^ 1;
380 
381  uint32_t g1 = (((float*)&xmm3)[1] > ((float*)&xmm3)[2]);
382  uint32_t l1 = g1 ^ 1;
383 
384  temp0 = g0 * ((uint32_t*)&xmm9)[0] + l0 * ((uint32_t*)&xmm9)[1];
385  temp1 = g0 * ((uint32_t*)&xmm9)[2] + l0 * ((uint32_t*)&xmm9)[3];
386  sq_dist = g0 * ((float*)&xmm3)[0] + l0 * ((float*)&xmm3)[1];
387  placeholder = g0 * ((float*)&xmm3)[2] + l0 * ((float*)&xmm3)[3];
388 
389  g0 = (sq_dist > placeholder);
390  l0 = g0 ^ 1;
391  target[0] = g0 * temp0 + l0 * temp1;
392  */
393 }
394 
395 #endif /*LV_HAVE_SSE3*/
396 
397 #ifdef LV_HAVE_GENERIC
398 static inline void
400  uint32_t num_points)
401 {
402  num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
403 
404  const uint32_t num_bytes = num_points*8;
405 
406  float sq_dist = 0.0;
407  float max = 0.0;
408  uint16_t index = 0;
409 
410  uint32_t i = 0;
411 
412  for(; i < num_bytes >> 3; ++i) {
413  sq_dist = lv_creal(src0[i]) * lv_creal(src0[i]) + lv_cimag(src0[i]) * lv_cimag(src0[i]);
414 
415  index = sq_dist > max ? i : index;
416  max = sq_dist > max ? sq_dist : max;
417  }
418  target[0] = index;
419 }
420 
421 #endif /*LV_HAVE_GENERIC*/
422 
423 
424 #endif /*INCLUDED_volk_32fc_index_max_16u_a_H*/
425 
426 
427 #ifndef INCLUDED_volk_32fc_index_max_16u_u_H
428 #define INCLUDED_volk_32fc_index_max_16u_u_H
429 
430 #include <volk/volk_common.h>
431 #include <inttypes.h>
432 #include <stdio.h>
433 #include <limits.h>
434 #include <volk/volk_complex.h>
435 
436 #ifdef LV_HAVE_AVX2
437 #include <immintrin.h>
438 
439 static inline void
440 volk_32fc_index_max_16u_u_avx2(uint16_t* target, lv_32fc_t* src0,
441  uint32_t num_points)
442 {
443  num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
444  // Branchless version, if we think it'll make a difference
445  //num_points = USHRT_MAX ^ ((num_points ^ USHRT_MAX) & -(num_points < USHRT_MAX));
446 
447  const uint32_t num_bytes = num_points*8;
448 
449  union bit256 holderf;
450  union bit256 holderi;
451  float sq_dist = 0.0;
452 
453  union bit256 xmm5, xmm4;
454  __m256 xmm1, xmm2, xmm3;
455  __m256i xmm8, xmm11, xmm12, xmmfive, xmmfour, xmm9, holder0, holder1, xmm10;
456 
457  xmm5.int_vec = xmmfive = _mm256_setzero_si256();
458  xmm4.int_vec = xmmfour = _mm256_setzero_si256();
459  holderf.int_vec = holder0 = _mm256_setzero_si256();
460  holderi.int_vec = holder1 = _mm256_setzero_si256();
461 
462  int bound = num_bytes >> 6;
463  int i = 0;
464 
465  xmm8 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
466  xmm9 = _mm256_setzero_si256(); //=xmm8
467  xmm10 = _mm256_set1_epi32(8);
468  xmm3 = _mm256_setzero_ps();
469 
470  __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
471  for(; i < bound; ++i) {
472  xmm1 = _mm256_loadu_ps((float*)src0);
473  xmm2 = _mm256_loadu_ps((float*)&src0[4]);
474 
475  src0 += 8;
476 
477  xmm1 = _mm256_mul_ps(xmm1, xmm1);
478  xmm2 = _mm256_mul_ps(xmm2, xmm2);
479 
480  xmm1 = _mm256_hadd_ps(xmm1, xmm2);
481  xmm1 = _mm256_permutevar8x32_ps(xmm1, idx);
482 
483  xmm3 = _mm256_max_ps(xmm1, xmm3);
484 
485  xmm4.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_LT_OS);
486  xmm5.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_EQ_OQ);
487 
488  xmm11 = _mm256_and_si256(xmm8, xmm5.int_vec);
489  xmm12 = _mm256_and_si256(xmm9, xmm4.int_vec);
490 
491  xmm9 = _mm256_add_epi32(xmm11, xmm12);
492 
493  xmm8 = _mm256_add_epi32(xmm8, xmm10);
494  }
495  xmm10 = _mm256_set1_epi32(4);
496  if (num_bytes >> 5 & 1) {
497  xmm1 = _mm256_loadu_ps((float*)src0);
498 
499  src0 += 4;
500 
501  xmm1 = _mm256_mul_ps(xmm1, xmm1);
502 
503  xmm1 = _mm256_hadd_ps(xmm1, xmm1);
504  xmm1 = _mm256_permutevar8x32_ps(xmm1, idx);
505 
506  xmm3 = _mm256_max_ps(xmm1, xmm3);
507 
508  xmm4.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_LT_OS);
509  xmm5.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_EQ_OQ);
510 
511  xmm11 = _mm256_and_si256(xmm8, xmm5.int_vec);
512  xmm12 = _mm256_and_si256(xmm9, xmm4.int_vec);
513 
514  xmm9 = _mm256_add_epi32(xmm11, xmm12);
515 
516  xmm8 = _mm256_add_epi32(xmm8, xmm10);
517  }
518 
519  idx = _mm256_set_epi32(1,0,1,0,1,0,1,0);
520  xmm10 = _mm256_set1_epi32(2);
521  if (num_bytes >> 4 & 1) {
522  xmm2 = _mm256_loadu_ps((float*)src0);
523 
524  xmm1 = _mm256_permutevar8x32_ps(bit256_p(&xmm8)->float_vec, idx);
525  xmm8 = bit256_p(&xmm1)->int_vec;
526 
527  xmm2 = _mm256_mul_ps(xmm2, xmm2);
528 
529  src0 += 2;
530 
531  xmm1 = _mm256_hadd_ps(xmm2, xmm2);
532 
533  xmm3 = _mm256_max_ps(xmm1, xmm3);
534 
535  xmm4.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_LT_OS);
536  xmm5.float_vec = _mm256_cmp_ps(xmm1, xmm3, _CMP_EQ_OQ);
537 
538  xmm11 = _mm256_and_si256(xmm8, xmm5.int_vec);
539  xmm12 = _mm256_and_si256(xmm9, xmm4.int_vec);
540 
541  xmm9 = _mm256_add_epi32(xmm11, xmm12);
542 
543  xmm8 = _mm256_add_epi32(xmm8, xmm10);
544  }
545 
546  _mm256_storeu_ps((float*)&(holderf.f), xmm3);
547  _mm256_storeu_si256(&(holderi.int_vec), xmm9);
548 
549  target[0] = holderi.i[0];
550  sq_dist = holderf.f[0];
551  target[0] = (holderf.f[1] > sq_dist) ? holderi.i[1] : target[0];
552  sq_dist = (holderf.f[1] > sq_dist) ? holderf.f[1] : sq_dist;
553  target[0] = (holderf.f[2] > sq_dist) ? holderi.i[2] : target[0];
554  sq_dist = (holderf.f[2] > sq_dist) ? holderf.f[2] : sq_dist;
555  target[0] = (holderf.f[3] > sq_dist) ? holderi.i[3] : target[0];
556  sq_dist = (holderf.f[3] > sq_dist) ? holderf.f[3] : sq_dist;
557  target[0] = (holderf.f[4] > sq_dist) ? holderi.i[4] : target[0];
558  sq_dist = (holderf.f[4] > sq_dist) ? holderf.f[4] : sq_dist;
559  target[0] = (holderf.f[5] > sq_dist) ? holderi.i[5] : target[0];
560  sq_dist = (holderf.f[5] > sq_dist) ? holderf.f[5] : sq_dist;
561  target[0] = (holderf.f[6] > sq_dist) ? holderi.i[6] : target[0];
562  sq_dist = (holderf.f[6] > sq_dist) ? holderf.f[6] : sq_dist;
563  target[0] = (holderf.f[7] > sq_dist) ? holderi.i[7] : target[0];
564  sq_dist = (holderf.f[7] > sq_dist) ? holderf.f[7] : sq_dist;
565 
566 }
567 
568 #endif /*LV_HAVE_AVX2*/
569 
570 #endif /*INCLUDED_volk_32fc_index_max_16u_u_H*/
lv_cimag
#define lv_cimag(x)
Definition: volk_complex.h:85
bit256::int_vec
__m256i int_vec
Definition: volk_common.h:126
bit128_p
#define bit128_p(x)
Definition: volk_common.h:131
bit256
Definition: volk_common.h:117
bit256::f
float f[8]
Definition: volk_common.h:121
bit256_p
#define bit256_p(x)
Definition: volk_common.h:132
i
for i
Definition: volk_config_fixed.tmpl.h:25
volk_common.h
bit256::i
uint32_t i[8]
Definition: volk_common.h:120
bit128::int_vec
__m128i int_vec
Definition: volk_common.h:112
volk_32fc_index_max_16u_a_sse3
static void volk_32fc_index_max_16u_a_sse3(uint16_t *target, lv_32fc_t *src0, uint32_t num_points)
Definition: volk_32fc_index_max_16u.h:251
bit128::i
uint32_t i[4]
Definition: volk_common.h:103
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:61
volk_complex.h
bit128::f
float f[4]
Definition: volk_common.h:104
bit256::float_vec
__m256 float_vec
Definition: volk_common.h:125
volk_32fc_index_max_16u_generic
static void volk_32fc_index_max_16u_generic(uint16_t *target, lv_32fc_t *src0, uint32_t num_points)
Definition: volk_32fc_index_max_16u.h:399
bit128
Definition: volk_common.h:100
lv_creal
#define lv_creal(x)
Definition: volk_complex.h:83
bit128::float_vec
__m128 float_vec
Definition: volk_common.h:108