39 #ifndef INCLUDED_VOLK_32u_REVERSE_32u_U_H
88 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30,
89 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98,
90 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64,
91 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC,
92 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02,
93 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2,
94 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A,
95 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
96 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E,
97 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81,
98 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71,
99 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
100 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15,
101 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
102 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43,
103 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
104 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B,
105 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97,
106 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F,
107 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
109 #ifdef LV_HAVE_GENERIC
111 unsigned int num_points)
115 unsigned int number = 0;
116 for(; number < num_points; ++number){
117 out_ptr->
b00 = in_ptr->
b31;
118 out_ptr->
b01 = in_ptr->
b30;
119 out_ptr->
b02 = in_ptr->
b29;
120 out_ptr->
b03 = in_ptr->
b28;
121 out_ptr->
b04 = in_ptr->
b27;
122 out_ptr->
b05 = in_ptr->
b26;
123 out_ptr->
b06 = in_ptr->
b25;
124 out_ptr->
b07 = in_ptr->
b24;
125 out_ptr->
b08 = in_ptr->
b23;
126 out_ptr->
b09 = in_ptr->
b22;
127 out_ptr->
b10 = in_ptr->
b21;
128 out_ptr->
b11 = in_ptr->
b20;
129 out_ptr->
b12 = in_ptr->
b19;
130 out_ptr->
b13 = in_ptr->
b18;
131 out_ptr->
b14 = in_ptr->
b17;
132 out_ptr->
b15 = in_ptr->
b16;
133 out_ptr->
b16 = in_ptr->
b15;
134 out_ptr->
b17 = in_ptr->
b14;
135 out_ptr->
b18 = in_ptr->
b13;
136 out_ptr->
b19 = in_ptr->
b12;
137 out_ptr->
b20 = in_ptr->
b11;
138 out_ptr->
b21 = in_ptr->
b10;
139 out_ptr->
b22 = in_ptr->
b09;
140 out_ptr->
b23 = in_ptr->
b08;
141 out_ptr->
b24 = in_ptr->
b07;
142 out_ptr->
b25 = in_ptr->
b06;
143 out_ptr->
b26 = in_ptr->
b05;
144 out_ptr->
b27 = in_ptr->
b04;
145 out_ptr->
b28 = in_ptr->
b03;
146 out_ptr->
b29 = in_ptr->
b02;
147 out_ptr->
b30 = in_ptr->
b01;
148 out_ptr->
b31 = in_ptr->
b00;
155 #ifdef LV_HAVE_GENERIC
157 unsigned int num_points)
159 const uint32_t *in_ptr = in;
160 uint32_t *out_ptr = out;
161 unsigned int number = 0;
162 for(; number < num_points; ++number){
209 #ifdef LV_HAVE_GENERIC
211 unsigned int num_points)
213 const uint32_t *in_ptr = in;
214 uint32_t *out_ptr = out;
215 unsigned int number = 0;
216 for(; number < num_points; ++number){
230 #ifdef LV_HAVE_GENERIC
232 unsigned int num_points)
234 const uint32_t *in_ptr = in;
235 uint32_t *out_ptr = out;
238 unsigned int number = 0;
239 for(; number < num_points; ++number){
240 in8 = (
const uint8_t*)in_ptr;
241 out8 = (uint8_t*)out_ptr;
242 out8[3] = ((in8[0] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
243 out8[2] = ((in8[1] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
244 out8[1] = ((in8[2] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
245 out8[0] = ((in8[3] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
252 #ifdef LV_HAVE_GENERIC
255 unsigned int num_points)
257 const uint32_t *in_ptr = in;
258 uint32_t *out_ptr = out;
261 unsigned int number = 0;
262 for(; number < num_points; ++number){
263 in8 = (
const uint8_t*)in_ptr;
264 out8 = (uint8_t*)out_ptr;
265 out8[3] = (in8[0] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
266 out8[2] = (in8[1] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
267 out8[1] = (in8[2] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
268 out8[0] = (in8[3] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
276 #ifdef LV_HAVE_GENERIC
278 unsigned int num_points)
280 const uint32_t *in_ptr = in;
281 uint32_t *out_ptr = out;
282 unsigned int number = 0;
283 for(; number < num_points; ++number){
284 uint32_t tmp = *in_ptr;
288 tmp = ( tmp << 16 ) | ( tmp >> 16 );
292 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
296 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) | ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
301 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
305 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
313 #ifdef LV_HAVE_GENERIC
315 unsigned int num_points)
318 const uint32_t *in_ptr = in;
319 uint32_t *out_ptr = out;
320 unsigned int number = 0;
321 for(; number < num_points; ++number){
322 uint32_t tmp = *in_ptr;
323 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
324 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
325 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) | ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
326 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
327 tmp = ( tmp << 16 ) | ( tmp >> 16 );
336 #ifdef LV_HAVE_NEONV8
337 #include <arm_neon.h>
339 static inline void volk_32u_reverse_32u_neonv8(uint32_t* out,
const uint32_t* in,
340 unsigned int num_points)
342 const uint32_t *in_ptr = in;
343 uint32_t *out_ptr = out;
345 const uint8x16_t idx = { 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 };
347 const unsigned int quarterPoints = num_points/4;
348 unsigned int number = 0;
349 for(; number < quarterPoints; ++number){
351 uint32x4_t x = vld1q_u32(in_ptr);
352 uint32x4_t z = vreinterpretq_u32_u8(vqtbl1q_u8(vrbitq_u8(vreinterpretq_u8_u32 (x)),
354 vst1q_u32 (out_ptr, z);
358 number = quarterPoints*4;
359 for(; number < num_points; ++number){
372 #include <arm_neon.h>
375 __VOLK_ASM("rbit %[result], %[value]" \
376 : [result]"=r" (*out_ptr) \
377 : [value] "r" (*in_ptr) \
383 unsigned int num_points)
386 const uint32_t *in_ptr = in;
387 uint32_t *out_ptr = out;
388 const unsigned int eighthPoints = num_points/8;
389 unsigned int number = 0;
390 for(; number < eighthPoints; ++number){
395 number = eighthPoints*8;
396 for(; number < num_points; ++number){