73 #ifndef INCLUDED_volk_32fc_x2_add_32fc_u_H
74 #define INCLUDED_volk_32fc_x2_add_32fc_u_H
77 #include <immintrin.h>
81 const lv_32fc_t* bVector,
unsigned int num_points)
83 unsigned int number = 0;
84 const unsigned int quarterPoints = num_points / 4;
90 __m256 aVal, bVal, cVal;
91 for(;number < quarterPoints; number++){
93 aVal = _mm256_loadu_ps((
float *) aPtr);
94 bVal = _mm256_loadu_ps((
float *) bPtr);
96 cVal = _mm256_add_ps(aVal, bVal);
98 _mm256_storeu_ps((
float *) cPtr,cVal);
105 number = quarterPoints * 4;
106 for(;number < num_points; number++){
107 *cPtr++ = (*aPtr++) + (*bPtr++);
114 #include <immintrin.h>
118 const lv_32fc_t* bVector,
unsigned int num_points)
120 unsigned int number = 0;
121 const unsigned int quarterPoints = num_points / 4;
127 __m256 aVal, bVal, cVal;
128 for(;number < quarterPoints; number++){
130 aVal = _mm256_load_ps((
float*) aPtr);
131 bVal = _mm256_load_ps((
float*) bPtr);
133 cVal = _mm256_add_ps(aVal, bVal);
135 _mm256_store_ps((
float*) cPtr,cVal);
142 number = quarterPoints * 4;
143 for(;number < num_points; number++){
144 *cPtr++ = (*aPtr++) + (*bPtr++);
151 #include <xmmintrin.h>
155 const lv_32fc_t* bVector,
unsigned int num_points)
157 unsigned int number = 0;
158 const unsigned int halfPoints = num_points / 2;
164 __m128 aVal, bVal, cVal;
165 for(;number < halfPoints; number++){
167 aVal = _mm_loadu_ps((
float *) aPtr);
168 bVal = _mm_loadu_ps((
float *) bPtr);
170 cVal = _mm_add_ps(aVal, bVal);
172 _mm_storeu_ps((
float*) cPtr, cVal);
179 number = halfPoints * 2;
180 for(;number < num_points; number++){
181 *cPtr++ = (*aPtr++) + (*bPtr++);
187 #ifdef LV_HAVE_GENERIC
191 const lv_32fc_t* bVector,
unsigned int num_points)
196 unsigned int number = 0;
198 for(number = 0; number < num_points; number++){
199 *cPtr++ = (*aPtr++) + (*bPtr++);
206 #include <xmmintrin.h>
211 unsigned int number = 0;
212 const unsigned int halfPoints = num_points / 2;
218 __m128 aVal, bVal, cVal;
219 for(;number < halfPoints; number++){
220 aVal = _mm_load_ps((
float *) aPtr);
221 bVal = _mm_load_ps((
float *) bPtr);
223 cVal = _mm_add_ps(aVal, bVal);
225 _mm_store_ps((
float *) cPtr,cVal);
232 number = halfPoints * 2;
233 for(;number < num_points; number++){
234 *cPtr++ = (*aPtr++) + (*bPtr++);
241 #include <arm_neon.h>
245 const lv_32fc_t* bVector,
unsigned int num_points)
247 unsigned int number = 0;
248 const unsigned int halfPoints = num_points / 2;
253 float32x4_t aVal, bVal, cVal;
254 for(number=0; number < halfPoints; number++){
256 aVal = vld1q_f32((
const float32_t*)(aPtr));
257 bVal = vld1q_f32((
const float32_t*)(bPtr));
262 cVal = vaddq_f32(aVal, bVal);
264 vst1q_f32((
float*)(cPtr),cVal);
271 number = halfPoints * 2;
272 for(;number < num_points; number++){
273 *cPtr++ = (*aPtr++) + (*bPtr++);