73 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
74 #define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
80 #include <immintrin.h>
85 unsigned int num_points) {
86 unsigned int number = 0;
88 const float *complexVectorPtr = (
float *)complexVector;
89 double *iBufferPtr = iBuffer;
90 double *qBufferPtr = qBuffer;
92 const unsigned int quarterPoints = num_points / 4;
94 __m128 complexH, complexL, fVal;
97 for (; number < quarterPoints; number++) {
99 cplxValue = _mm256_loadu_ps(complexVectorPtr);
100 complexVectorPtr += 8;
102 complexH = _mm256_extractf128_ps(cplxValue, 1);
103 complexL = _mm256_extractf128_ps(cplxValue, 0);
106 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
107 dVal = _mm256_cvtps_pd(fVal);
108 _mm256_storeu_pd(iBufferPtr, dVal);
111 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
112 dVal = _mm256_cvtps_pd(fVal);
113 _mm256_storeu_pd(qBufferPtr, dVal);
119 number = quarterPoints * 4;
120 for (; number < num_points; number++) {
121 *iBufferPtr++ = *complexVectorPtr++;
122 *qBufferPtr++ = *complexVectorPtr++;
128 #include <emmintrin.h>
133 unsigned int num_points) {
134 unsigned int number = 0;
136 const float *complexVectorPtr = (
float *)complexVector;
137 double *iBufferPtr = iBuffer;
138 double *qBufferPtr = qBuffer;
140 const unsigned int halfPoints = num_points / 2;
141 __m128 cplxValue, fVal;
144 for (; number < halfPoints; number++) {
146 cplxValue = _mm_loadu_ps(complexVectorPtr);
147 complexVectorPtr += 4;
150 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
151 dVal = _mm_cvtps_pd(fVal);
152 _mm_storeu_pd(iBufferPtr, dVal);
155 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
156 dVal = _mm_cvtps_pd(fVal);
157 _mm_storeu_pd(qBufferPtr, dVal);
163 number = halfPoints * 2;
164 for (; number < num_points; number++) {
165 *iBufferPtr++ = *complexVectorPtr++;
166 *qBufferPtr++ = *complexVectorPtr++;
171 #ifdef LV_HAVE_GENERIC
176 unsigned int num_points) {
177 unsigned int number = 0;
178 const float *complexVectorPtr = (
float *)complexVector;
179 double *iBufferPtr = iBuffer;
180 double *qBufferPtr = qBuffer;
182 for (number = 0; number < num_points; number++) {
183 *iBufferPtr++ = (double)*complexVectorPtr++;
184 *qBufferPtr++ = (double)*complexVectorPtr++;
190 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
191 #define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
193 #include <inttypes.h>
197 #include <immintrin.h>
202 unsigned int num_points) {
203 unsigned int number = 0;
205 const float *complexVectorPtr = (
float *)complexVector;
206 double *iBufferPtr = iBuffer;
207 double *qBufferPtr = qBuffer;
209 const unsigned int quarterPoints = num_points / 4;
211 __m128 complexH, complexL, fVal;
214 for (; number < quarterPoints; number++) {
216 cplxValue = _mm256_load_ps(complexVectorPtr);
217 complexVectorPtr += 8;
219 complexH = _mm256_extractf128_ps(cplxValue, 1);
220 complexL = _mm256_extractf128_ps(cplxValue, 0);
223 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
224 dVal = _mm256_cvtps_pd(fVal);
225 _mm256_store_pd(iBufferPtr, dVal);
228 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
229 dVal = _mm256_cvtps_pd(fVal);
230 _mm256_store_pd(qBufferPtr, dVal);
236 number = quarterPoints * 4;
237 for (; number < num_points; number++) {
238 *iBufferPtr++ = *complexVectorPtr++;
239 *qBufferPtr++ = *complexVectorPtr++;
245 #include <emmintrin.h>
250 unsigned int num_points) {
251 unsigned int number = 0;
253 const float *complexVectorPtr = (
float *)complexVector;
254 double *iBufferPtr = iBuffer;
255 double *qBufferPtr = qBuffer;
257 const unsigned int halfPoints = num_points / 2;
258 __m128 cplxValue, fVal;
261 for (; number < halfPoints; number++) {
263 cplxValue = _mm_load_ps(complexVectorPtr);
264 complexVectorPtr += 4;
267 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
268 dVal = _mm_cvtps_pd(fVal);
269 _mm_store_pd(iBufferPtr, dVal);
272 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
273 dVal = _mm_cvtps_pd(fVal);
274 _mm_store_pd(qBufferPtr, dVal);
280 number = halfPoints * 2;
281 for (; number < num_points; number++) {
282 *iBufferPtr++ = *complexVectorPtr++;
283 *qBufferPtr++ = *complexVectorPtr++;
288 #ifdef LV_HAVE_GENERIC
293 unsigned int num_points) {
294 unsigned int number = 0;
295 const float *complexVectorPtr = (
float *)complexVector;
296 double *iBufferPtr = iBuffer;
297 double *qBufferPtr = qBuffer;
299 for (number = 0; number < num_points; number++) {
300 *iBufferPtr++ = (double)*complexVectorPtr++;
301 *qBufferPtr++ = (double)*complexVectorPtr++;
306 #ifdef LV_HAVE_NEONV8
307 #include <arm_neon.h>
310 volk_32fc_deinterleave_64f_x2_neon(
double *iBuffer,
double *qBuffer,
312 unsigned int num_points) {
313 unsigned int number = 0;
314 unsigned int half_points = num_points / 2;
315 const float *complexVectorPtr = (
float *)complexVector;
316 double *iBufferPtr = iBuffer;
317 double *qBufferPtr = qBuffer;
318 float32x2x2_t complexInput;
319 float64x2_t iVal, qVal;
321 for (number = 0; number < half_points; number++) {
322 complexInput = vld2_f32(complexVectorPtr);
324 iVal = vcvt_f64_f32(complexInput.val[0]);
325 qVal = vcvt_f64_f32(complexInput.val[1]);
327 vst1q_f64(iBufferPtr, iVal);
328 vst1q_f64(qBufferPtr, qVal);
330 complexVectorPtr += 4;
335 for (number = half_points * 2; number < num_points; number++) {
336 *iBufferPtr++ = (double)*complexVectorPtr++;
337 *qBufferPtr++ = (double)*complexVectorPtr++;