Pico GPS Teseo I2C
Loading...
Searching...
No Matches
neon.h
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * Copyright 2022 Google LLC
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18
19#if __ARM_NEON && __ARM_ARCH_ISA_A64
20
21#include <arm_neon.h>
22
23#else
24
25#include <stdint.h>
26
27
28/* ----------------------------------------------------------------------------
29 * Integer
30 * -------------------------------------------------------------------------- */
31
32typedef struct { int16_t e[4]; } int16x4_t;
33
34typedef struct { int16_t e[8]; } int16x8_t;
35typedef struct { int32_t e[4]; } int32x4_t;
36typedef struct { int64_t e[2]; } int64x2_t;
37
38
42
44static int16x4_t vld1_s16(const int16_t *p)
45{
46 return (int16x4_t){ { p[0], p[1], p[2], p[3] } };
47}
48
49
53
55static int32x4_t vmull_s16(int16x4_t a, int16x4_t b)
56{
57 return (int32x4_t){ { a.e[0] * b.e[0], a.e[1] * b.e[1],
58 a.e[2] * b.e[2], a.e[3] * b.e[3] } };
59}
60
63{
64 return (int32x4_t){ {
65 r.e[0] + a.e[0] * b.e[0], r.e[1] + a.e[1] * b.e[1],
66 r.e[2] + a.e[2] * b.e[2], r.e[3] + a.e[3] * b.e[3] } };
67}
68
70static int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b)
71{
73
74 r.e[0] = a.e[0] + ((int64_t)b.e[0] + b.e[1]);
75 r.e[1] = a.e[1] + ((int64_t)b.e[2] + b.e[3]);
76
77 return r;
78}
79
80
84
86static int32_t vaddvq_s32(int32x4_t v)
87{
88 return v.e[0] + v.e[1] + v.e[2] + v.e[3];
89}
90
92static int64_t vaddvq_s64(int64x2_t v)
93{
94 return v.e[0] + v.e[1];
95}
96
97
101
103static int16x4_t vext_s16(int16x4_t a, int16x4_t b, const int n)
104{
105 int16_t x[] = { a.e[0], a.e[1], a.e[2], a.e[3],
106 b.e[0], b.e[1], b.e[2], b.e[3] };
107
108 return (int16x4_t){ { x[n], x[n+1], x[n+2], x[n+3] } };
109}
110
112static int32x4_t vmovq_n_s32(uint32_t v)
113{
114 return (int32x4_t){ { v, v, v, v } };
115}
116
118static int64x2_t vmovq_n_s64(int64_t v)
119{
120 return (int64x2_t){ { v, v, } };
121}
122
123
124
125/* ----------------------------------------------------------------------------
126 * Floating Point
127 * -------------------------------------------------------------------------- */
128
129typedef struct { float e[2]; } float32x2_t;
130typedef struct { float e[4]; } float32x4_t;
131
132typedef struct { float32x2_t val[2]; } float32x2x2_t;
133typedef struct { float32x4_t val[2]; } float32x4x2_t;
134
135
139
141static float32x2_t vld1_f32(const float *p)
142{
143 return (float32x2_t){ { p[0], p[1] } };
144}
145
147static float32x4_t vld1q_f32(const float *p)
148{
149 return (float32x4_t){ { p[0], p[1], p[2], p[3] } };
150}
151
153static float32x4_t vld1q_dup_f32(const float *p)
154{
155 return (float32x4_t){ { p[0], p[0], p[0], p[0] } };
156}
157
159static float32x2x2_t vld2_f32(const float *p)
160{
161 return (float32x2x2_t){ .val[0] = { { p[0], p[2] } },
162 .val[1] = { { p[1], p[3] } } };
163}
164
166static float32x4x2_t vld2q_f32(const float *p)
167{
168 return (float32x4x2_t){ .val[0] = { { p[0], p[2], p[4], p[6] } },
169 .val[1] = { { p[1], p[3], p[5], p[7] } } };
170}
171
173static void vst1_f32(float *p, float32x2_t v)
174{
175 p[0] = v.e[0], p[1] = v.e[1];
176}
177
179static void vst1q_f32(float *p, float32x4_t v)
180{
181 p[0] = v.e[0], p[1] = v.e[1], p[2] = v.e[2], p[3] = v.e[3];
182}
183
187
189static float32x2_t vneg_f32(float32x2_t a)
190{
191 return (float32x2_t){ { -a.e[0], -a.e[1] } };
192}
193
195static float32x4_t vnegq_f32(float32x4_t a)
196{
197 return (float32x4_t){ { -a.e[0], -a.e[1], -a.e[2], -a.e[3] } };
198}
199
201static float32x4_t vaddq_f32(float32x4_t a, float32x4_t b)
202{
203 return (float32x4_t){ { a.e[0] + b.e[0], a.e[1] + b.e[1],
204 a.e[2] + b.e[2], a.e[3] + b.e[3] } };
205}
206
208static float32x4_t vsubq_f32(float32x4_t a, float32x4_t b)
209{
210 return (float32x4_t){ { a.e[0] - b.e[0], a.e[1] - b.e[1],
211 a.e[2] - b.e[2], a.e[3] - b.e[3] } };
212}
213
216{
217 return (float32x2_t){ {
218 a.e[0] + b.e[0] * c.e[0], a.e[1] + b.e[1] * c.e[1] } };
219}
220
223{
224 return (float32x4_t){ {
225 a.e[0] + b.e[0] * c.e[0], a.e[1] + b.e[1] * c.e[1],
226 a.e[2] + b.e[2] * c.e[2], a.e[3] + b.e[3] * c.e[3] } };
227}
228
231{
232 return (float32x2_t){ {
233 a.e[0] - b.e[0] * c.e[0], a.e[1] - b.e[1] * c.e[1] } };
234}
235
238{
239 return (float32x4_t){ {
240 a.e[0] - b.e[0] * c.e[0], a.e[1] - b.e[1] * c.e[1],
241 a.e[2] - b.e[2] * c.e[2], a.e[3] - b.e[3] * c.e[3] } };
242}
243
244
248
250static float32x2_t vcreate_f32(uint64_t u)
251{
252 float *f = (float *)&u;
253 return (float32x2_t){ { f[0] , f[1] } };
254}
255
257static float32x4_t vcombine_f32(float32x2_t a, float32x2_t b)
258{
259 return (float32x4_t){ { a.e[0], a.e[1], b.e[0], b.e[1] } };
260}
261
263static float32x2_t vget_low_f32(float32x4_t a)
264{
265 return (float32x2_t){ { a.e[0], a.e[1] } };
266}
267
269static float32x2_t vget_high_f32(float32x4_t a)
270{
271 return (float32x2_t){ { a.e[2], a.e[3] } };
272}
273
275static float32x4_t vmovq_n_f32(float v)
276{
277 return (float32x4_t){ { v, v, v, v } };
278}
279
281static float32x2_t vrev64_f32(float32x2_t v)
282{
283 return (float32x2_t){ { v.e[1], v.e[0] } };
284}
285
287static float32x4_t vrev64q_f32(float32x4_t v)
288{
289 return (float32x4_t){ { v.e[1], v.e[0], v.e[3], v.e[2] } };
290}
291
293static float32x2_t vtrn1_f32(float32x2_t a, float32x2_t b)
294{
295 return (float32x2_t){ { a.e[0], b.e[0] } };
296}
297
299static float32x2_t vtrn2_f32(float32x2_t a, float32x2_t b)
300{
301 return (float32x2_t){ { a.e[1], b.e[1] } };
302}
303
305static float32x4_t vtrn1q_f32(float32x4_t a, float32x4_t b)
306{
307 return (float32x4_t){ { a.e[0], b.e[0], a.e[2], b.e[2] } };
308}
309
311static float32x4_t vtrn2q_f32(float32x4_t a, float32x4_t b)
312{
313 return (float32x4_t){ { a.e[1], b.e[1], a.e[3], b.e[3] } };
314}
315
317static float32x4_t vzip1q_f32(float32x4_t a, float32x4_t b)
318{
319 return (float32x4_t){ { a.e[0], b.e[0], a.e[1], b.e[1] } };
320}
321
323static float32x4_t vzip2q_f32(float32x4_t a, float32x4_t b)
324{
325 return (float32x4_t){ { a.e[2], b.e[2], a.e[3], b.e[3] } };
326}
327
328
329#endif /* __ARM_NEON */
void uint32_t
Definition bootrom.h:36
int void * v
Definition ctypes.h:115
int int n
Definition ctypes.h:90
#define unused(v)
Marking v as a unused parameter or value.
Definition compiler.h:102
void __attribute__((noreturn))(*rom_reset_usb_boot_fn)(uint32_t
Reboot the device into BOOTSEL mode.
Definition bootrom.h:36
float32x2_t float32x2_t c
Definition neon.h:216
int16x4_t a
Definition neon.h:62
return r
Definition neon.h:77
int16x2_t b
Definition simd32.h:38
int16x2_t int32_t u
Definition simd32.h:47
float x
Definition sqrt_test.cpp:13
float e[2]
Definition neon.h:129
float32x2_t val[2]
Definition neon.h:132
float e[4]
Definition neon.h:130
float32x4_t val[2]
Definition neon.h:133
int16_t e[4]
Definition neon.h:32
int16_t e[8]
Definition neon.h:34
int32_t e[4]
Definition neon.h:35
int64_t e[2]
Definition neon.h:36