71 #ifndef INCLUDED_volk_64f_x2_multiply_64f_H
72 #define INCLUDED_volk_64f_x2_multiply_64f_H
77 #ifdef LV_HAVE_GENERIC
80 const double* aVector,
81 const double* bVector,
82 unsigned int num_points)
84 double* cPtr = cVector;
85 const double* aPtr = aVector;
86 const double* bPtr = bVector;
87 unsigned int number = 0;
89 for (number = 0; number < num_points; number++) {
90 *cPtr++ = (*aPtr++) * (*bPtr++);
102 #include <emmintrin.h>
105 const double* aVector,
106 const double* bVector,
107 unsigned int num_points)
109 unsigned int number = 0;
110 const unsigned int half_points = num_points / 2;
112 double* cPtr = cVector;
113 const double* aPtr = aVector;
114 const double* bPtr = bVector;
116 __m128d aVal, bVal, cVal;
117 for (; number < half_points; number++) {
118 aVal = _mm_loadu_pd(aPtr);
119 bVal = _mm_loadu_pd(bPtr);
121 cVal = _mm_mul_pd(aVal, bVal);
123 _mm_storeu_pd(cPtr, cVal);
130 number = half_points * 2;
131 for (; number < num_points; number++) {
132 *cPtr++ = (*aPtr++) * (*bPtr++);
141 #include <immintrin.h>
144 const double* aVector,
145 const double* bVector,
146 unsigned int num_points)
148 unsigned int number = 0;
149 const unsigned int quarter_points = num_points / 4;
151 double* cPtr = cVector;
152 const double* aPtr = aVector;
153 const double* bPtr = bVector;
155 __m256d aVal, bVal, cVal;
156 for (; number < quarter_points; number++) {
158 aVal = _mm256_loadu_pd(aPtr);
159 bVal = _mm256_loadu_pd(bPtr);
161 cVal = _mm256_mul_pd(aVal, bVal);
163 _mm256_storeu_pd(cPtr, cVal);
170 number = quarter_points * 4;
171 for (; number < num_points; number++) {
172 *cPtr++ = (*aPtr++) * (*bPtr++);
184 #include <emmintrin.h>
187 const double* aVector,
188 const double* bVector,
189 unsigned int num_points)
191 unsigned int number = 0;
192 const unsigned int half_points = num_points / 2;
194 double* cPtr = cVector;
195 const double* aPtr = aVector;
196 const double* bPtr = bVector;
198 __m128d aVal, bVal, cVal;
199 for (; number < half_points; number++) {
200 aVal = _mm_load_pd(aPtr);
201 bVal = _mm_load_pd(bPtr);
203 cVal = _mm_mul_pd(aVal, bVal);
205 _mm_store_pd(cPtr, cVal);
212 number = half_points * 2;
213 for (; number < num_points; number++) {
214 *cPtr++ = (*aPtr++) * (*bPtr++);
223 #include <immintrin.h>
226 const double* aVector,
227 const double* bVector,
228 unsigned int num_points)
230 unsigned int number = 0;
231 const unsigned int quarter_points = num_points / 4;
233 double* cPtr = cVector;
234 const double* aPtr = aVector;
235 const double* bPtr = bVector;
237 __m256d aVal, bVal, cVal;
238 for (; number < quarter_points; number++) {
240 aVal = _mm256_load_pd(aPtr);
241 bVal = _mm256_load_pd(bPtr);
243 cVal = _mm256_mul_pd(aVal, bVal);
245 _mm256_store_pd(cPtr, cVal);
252 number = quarter_points * 4;
253 for (; number < num_points; number++) {
254 *cPtr++ = (*aPtr++) * (*bPtr++);