Grok 10.0.3
skeleton-inl.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Demo of functions that might be called from multiple SIMD modules (either
17// other -inl.h files, or a .cc file between begin/end_target-inl). This is
18// optional - all SIMD code can reside in .cc files. However, this allows
19// splitting code into different files while still inlining instead of requiring
20// calling through function pointers.
21
22// Include guard (still compiled once per target)
23#if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
24#ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
25#undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
26#else
27#define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
28#endif
29
30// It is fine to #include normal or *-inl headers.
31#include <stddef.h>
32
33#include "hwy/highway.h"
34
36namespace skeleton {
37namespace HWY_NAMESPACE {
38
39using namespace hwy::HWY_NAMESPACE;
40
41// Example of a type-agnostic (caller-specified lane type) and width-agnostic
42// (uses best available instruction set) function in a header.
43//
44// Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size.
45template <class D, typename T>
46HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
47 const T* HWY_RESTRICT add_array,
48 const size_t size, T* HWY_RESTRICT x_array) {
49 for (size_t i = 0; i < size; i += Lanes(d)) {
50 const auto mul = Load(d, mul_array + i);
51 const auto add = Load(d, add_array + i);
52 auto x = Load(d, x_array + i);
53 x = MulAdd(mul, x, add);
54 Store(x, d, x_array + i);
55 }
56}
57
58// NOLINTNEXTLINE(google-readability-namespace-comments)
59} // namespace HWY_NAMESPACE
60} // namespace skeleton
62
63#endif // include guard
#define HWY_RESTRICT
Definition: base.h:61
#define HWY_MAYBE_UNUSED
Definition: base.h:73
Definition: copy-inl.h:31
d
Definition: rvv-inl.h:1742
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1784
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:236
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2706
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2882
HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T *HWY_RESTRICT mul_array, const T *HWY_RESTRICT add_array, const size_t size, T *HWY_RESTRICT x_array)
Definition: skeleton-inl.h:46
Definition: skeleton-inl.h:36
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()