Vector Optimized Library of Kernels  2.1
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
72 #define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 
77 #ifdef LV_HAVE_AVX2
78 #include <immintrin.h>
79 
80 static inline void volk_32fc_deinterleave_real_64f_a_avx2(
81  double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points) {
82  unsigned int number = 0;
83 
84  const float *complexVectorPtr = (float *)complexVector;
85  double *iBufferPtr = iBuffer;
86 
87  const unsigned int quarterPoints = num_points / 4;
88  __m256 cplxValue;
89  __m128 fVal;
90  __m256d dVal;
91  __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
92  for (; number < quarterPoints; number++) {
93 
94  cplxValue = _mm256_load_ps(complexVectorPtr);
95  complexVectorPtr += 8;
96 
97  // Arrange in i1i2i1i2 format
98  cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
99  fVal = _mm256_extractf128_ps(cplxValue, 0);
100  dVal = _mm256_cvtps_pd(fVal);
101  _mm256_store_pd(iBufferPtr, dVal);
102 
103  iBufferPtr += 4;
104  }
105 
106  number = quarterPoints * 4;
107  for (; number < num_points; number++) {
108  *iBufferPtr++ = (double)*complexVectorPtr++;
109  complexVectorPtr++;
110  }
111 }
112 #endif /* LV_HAVE_AVX2 */
113 
114 #ifdef LV_HAVE_SSE2
115 #include <emmintrin.h>
116 
118  double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points) {
119  unsigned int number = 0;
120 
121  const float *complexVectorPtr = (float *)complexVector;
122  double *iBufferPtr = iBuffer;
123 
124  const unsigned int halfPoints = num_points / 2;
125  __m128 cplxValue, fVal;
126  __m128d dVal;
127  for (; number < halfPoints; number++) {
128 
129  cplxValue = _mm_load_ps(complexVectorPtr);
130  complexVectorPtr += 4;
131 
132  // Arrange in i1i2i1i2 format
133  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
134  dVal = _mm_cvtps_pd(fVal);
135  _mm_store_pd(iBufferPtr, dVal);
136 
137  iBufferPtr += 2;
138  }
139 
140  number = halfPoints * 2;
141  for (; number < num_points; number++) {
142  *iBufferPtr++ = (double)*complexVectorPtr++;
143  complexVectorPtr++;
144  }
145 }
146 #endif /* LV_HAVE_SSE */
147 
148 #ifdef LV_HAVE_GENERIC
149 
151  double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points) {
152  unsigned int number = 0;
153  const float *complexVectorPtr = (float *)complexVector;
154  double *iBufferPtr = iBuffer;
155  for (number = 0; number < num_points; number++) {
156  *iBufferPtr++ = (double)*complexVectorPtr++;
157  complexVectorPtr++;
158  }
159 }
160 #endif /* LV_HAVE_GENERIC */
161 
162 #ifdef LV_HAVE_NEONV8
163 #include <arm_neon.h>
164 
165 static inline void volk_32fc_deinterleave_real_64f_neon(
166  double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points) {
167  unsigned int number = 0;
168  unsigned int quarter_points = num_points / 4;
169  const float *complexVectorPtr = (float *)complexVector;
170  double *iBufferPtr = iBuffer;
171  float32x2x4_t complexInput;
172  float64x2_t iVal1;
173  float64x2_t iVal2;
174  float64x2x2_t iVal;
175 
176  for (number = 0; number < quarter_points; number++) {
177  // Load data into register
178  complexInput = vld4_f32(complexVectorPtr);
179 
180  // Perform single to double precision conversion
181  iVal1 = vcvt_f64_f32(complexInput.val[0]);
182  iVal2 = vcvt_f64_f32(complexInput.val[2]);
183  iVal.val[0] = iVal1;
184  iVal.val[1] = iVal2;
185 
186  // Store results into memory buffer
187  vst2q_f64(iBufferPtr, iVal);
188 
189  // Update pointers
190  iBufferPtr += 4;
191  complexVectorPtr += 8;
192  }
193 
194  for (number = quarter_points * 4; number < num_points; number++) {
195  *iBufferPtr++ = (double)*complexVectorPtr++;
196  complexVectorPtr++;
197  }
198 }
199 #endif /* LV_HAVE_NEON */
200 
201 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
202 
203 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
204 #define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
205 
206 #include <inttypes.h>
207 #include <stdio.h>
208 
209 #ifdef LV_HAVE_AVX2
210 #include <immintrin.h>
211 
212 static inline void volk_32fc_deinterleave_real_64f_u_avx2(
213  double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points) {
214  unsigned int number = 0;
215 
216  const float *complexVectorPtr = (float *)complexVector;
217  double *iBufferPtr = iBuffer;
218 
219  const unsigned int quarterPoints = num_points / 4;
220  __m256 cplxValue;
221  __m128 fVal;
222  __m256d dVal;
223  __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
224  for (; number < quarterPoints; number++) {
225 
226  cplxValue = _mm256_loadu_ps(complexVectorPtr);
227  complexVectorPtr += 8;
228 
229  // Arrange in i1i2i1i2 format
230  cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
231  fVal = _mm256_extractf128_ps(cplxValue, 0);
232  dVal = _mm256_cvtps_pd(fVal);
233  _mm256_storeu_pd(iBufferPtr, dVal);
234 
235  iBufferPtr += 4;
236  }
237 
238  number = quarterPoints * 4;
239  for (; number < num_points; number++) {
240  *iBufferPtr++ = (double)*complexVectorPtr++;
241  complexVectorPtr++;
242  }
243 }
244 #endif /* LV_HAVE_AVX2 */
245 
246 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */
volk_32fc_deinterleave_real_64f_generic
static void volk_32fc_deinterleave_real_64f_generic(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:150
volk_32fc_deinterleave_real_64f_a_sse2
static void volk_32fc_deinterleave_real_64f_a_sse2(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:117
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:61