70 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
71 #define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
77 #include <immintrin.h>
79 static inline void volk_32fc_deinterleave_real_32f_a_avx2(
float* iBuffer,
81 unsigned int num_points)
83 unsigned int number = 0;
84 const unsigned int eighthPoints = num_points / 8;
86 const float* complexVectorPtr = (
const float*)complexVector;
87 float* iBufferPtr = iBuffer;
89 __m256 cplxValue1, cplxValue2;
91 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
92 for (; number < eighthPoints; number++) {
94 cplxValue1 = _mm256_load_ps(complexVectorPtr);
95 complexVectorPtr += 8;
97 cplxValue2 = _mm256_load_ps(complexVectorPtr);
98 complexVectorPtr += 8;
101 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
102 iValue = _mm256_permutevar8x32_ps(iValue, idx);
104 _mm256_store_ps(iBufferPtr, iValue);
109 number = eighthPoints * 8;
110 for (; number < num_points; number++) {
111 *iBufferPtr++ = *complexVectorPtr++;
118 #include <xmmintrin.h>
122 unsigned int num_points)
124 unsigned int number = 0;
125 const unsigned int quarterPoints = num_points / 4;
127 const float* complexVectorPtr = (
const float*)complexVector;
128 float* iBufferPtr = iBuffer;
130 __m128 cplxValue1, cplxValue2, iValue;
131 for (; number < quarterPoints; number++) {
133 cplxValue1 = _mm_load_ps(complexVectorPtr);
134 complexVectorPtr += 4;
136 cplxValue2 = _mm_load_ps(complexVectorPtr);
137 complexVectorPtr += 4;
140 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
142 _mm_store_ps(iBufferPtr, iValue);
147 number = quarterPoints * 4;
148 for (; number < num_points; number++) {
149 *iBufferPtr++ = *complexVectorPtr++;
156 #ifdef LV_HAVE_GENERIC
160 unsigned int num_points)
162 unsigned int number = 0;
163 const float* complexVectorPtr = (
float*)complexVector;
164 float* iBufferPtr = iBuffer;
165 for (number = 0; number < num_points; number++) {
166 *iBufferPtr++ = *complexVectorPtr++;
174 #include <arm_neon.h>
178 unsigned int num_points)
180 unsigned int number = 0;
181 unsigned int quarter_points = num_points / 4;
182 const float* complexVectorPtr = (
float*)complexVector;
183 float* iBufferPtr = iBuffer;
184 float32x4x2_t complexInput;
186 for (number = 0; number < quarter_points; number++) {
187 complexInput = vld2q_f32(complexVectorPtr);
188 vst1q_f32(iBufferPtr, complexInput.val[0]);
189 complexVectorPtr += 8;
193 for (number = quarter_points * 4; number < num_points; number++) {
194 *iBufferPtr++ = *complexVectorPtr++;
203 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
204 #define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
206 #include <inttypes.h>
210 #include <immintrin.h>
212 static inline void volk_32fc_deinterleave_real_32f_u_avx2(
float* iBuffer,
214 unsigned int num_points)
216 unsigned int number = 0;
217 const unsigned int eighthPoints = num_points / 8;
219 const float* complexVectorPtr = (
const float*)complexVector;
220 float* iBufferPtr = iBuffer;
222 __m256 cplxValue1, cplxValue2;
224 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
225 for (; number < eighthPoints; number++) {
227 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228 complexVectorPtr += 8;
230 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231 complexVectorPtr += 8;
234 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
235 iValue = _mm256_permutevar8x32_ps(iValue, idx);
237 _mm256_storeu_ps(iBufferPtr, iValue);
242 number = eighthPoints * 8;
243 for (; number < num_points; number++) {
244 *iBufferPtr++ = *complexVectorPtr++;