23#if defined(_MSC_VER) && \
24 (defined(_M_AMD64) || defined(_M_IX86) || defined(_M_ARM64) || defined(_M_ARM))
30#define DISPENSO_MAJOR_VERSION 1
31#define DISPENSO_MINOR_VERSION 4
32#define DISPENSO_PATCH_VERSION 1
35#if __cplusplus >= 202002L && defined(__cpp_concepts) && __cpp_concepts >= 201907L
36#define DISPENSO_HAS_CONCEPTS 1
39#define DISPENSO_HAS_CONCEPTS 0
56#if DISPENSO_HAS_CONCEPTS
57#define DISPENSO_REQUIRES(...) requires(__VA_ARGS__)
59#define DISPENSO_REQUIRES(...)
62#if defined(DISPENSO_SHARED_LIB)
65#if defined(DISPENSO_LIB_EXPORT)
66#define DISPENSO_DLL_ACCESS __declspec(dllexport)
68#define DISPENSO_DLL_ACCESS __declspec(dllimport)
71#elif defined(__clang__) || defined(__GNUC__)
72#define DISPENSO_DLL_ACCESS __attribute__((visibility("default")))
76#if !defined(DISPENSO_DLL_ACCESS)
77#define DISPENSO_DLL_ACCESS
80using ssize_t = std::make_signed<std::size_t>::type;
82#if defined(__clang__) || defined(__GNUC__)
83#define DISPENSO_INLINE __attribute__((always_inline)) inline
84#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)
85#define DISPENSO_INLINE __forceinline
87#define DISPENSO_INLINE inline
94#if defined(__APPLE__) && defined(__arm64__)
107#define DISPENSO_THREAD_LOCAL __declspec(thread)
108#elif defined(__GNUC__) || defined(__clang__)
109#define DISPENSO_THREAD_LOCAL __thread
111#error Supply lightweight thread-locals for this compiler. Can define to thread_local if lightweight not available
114#if (defined(__GNUC__) || defined(__clang__))
115#define DISPENSO_EXPECT(a, b) __builtin_expect(a, b)
117#define DISPENSO_EXPECT(a, b) a
121#if (defined(__GNUC__) || defined(__clang__))
122#define DO_PRAGMA(X) _Pragma(#X)
123#define DISPENSO_DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push)
124#define DISPENSO_DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop)
125#define DISPENSO_DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName)
126#if !defined(__clang__)
127#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
128#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS
130#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS \
131 DISPENSO_DISABLE_WARNING(-Wgnu-zero-variadic-macro-arguments)
132#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS \
133 DISPENSO_DISABLE_WARNING(-Wglobal-constructors)
135#elif defined(_MSC_VER)
136#define DISPENSO_DISABLE_WARNING_PUSH __pragma(warning(push))
137#define DISPENSO_DISABLE_WARNING_POP __pragma(warning(pop))
138#define DISPENSO_DISABLE_WARNING(warningNumber) __pragma(warning(disable : warningNumber))
139#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
140#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS
142#define DISPENSO_DISABLE_WARNING_PUSH
143#define DISPENSO_DISABLE_WARNING_POP
144#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
145#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS
166 operator const T&()
const {
178 alignas(
alignof(T))
char b[
sizeof(T)];
182struct alignas(kCacheLineSize)
AlignedAtomic :
public std::atomic<T*> {};
184inline void* alignedMalloc(
size_t bytes,
size_t alignment) {
185 alignment = std::max(alignment,
sizeof(uintptr_t));
186 char* ptr =
reinterpret_cast<char*
>(::malloc(bytes + alignment));
187 uintptr_t base =
reinterpret_cast<uintptr_t
>(ptr);
188 uintptr_t oldBase = base;
189 uintptr_t mask = alignment - 1;
193 uintptr_t* recovery =
reinterpret_cast<uintptr_t*
>(base -
sizeof(uintptr_t));
195 return reinterpret_cast<void*
>(base);
198inline void* alignedMalloc(
size_t bytes) {
199 return alignedMalloc(bytes, kCacheLineSize);
202inline void alignedFree(
void* ptr) {
206 char* p =
reinterpret_cast<char*
>(ptr);
207 uintptr_t recovered = *
reinterpret_cast<uintptr_t*
>(p -
sizeof(uintptr_t));
208 ::free(
reinterpret_cast<void*
>(recovered));
212struct AlignedFreeDeleter {
213 void operator()(T* ptr) {
215 detail::alignedFree(ptr);
219struct AlignedFreeDeleter<void> {
220 void operator()(
void* ptr) {
221 detail::alignedFree(ptr);
225template <
typename T,
class... Args>
226std::shared_ptr<T> make_shared(Args&&... args) {
227 void* tv = alignedMalloc(
sizeof(T),
alignof(T));
228 T* t =
new (tv) T(std::forward<Args>(args)...);
229 return std::shared_ptr<T>(t, AlignedFreeDeleter<T>());
232inline constexpr uintptr_t alignToCacheLine(uintptr_t val) {
239#if defined __x86_64__ || defined __i386__
240inline void cpuRelax() {
241 asm volatile(
"pause" :::
"memory");
243#elif defined _MSC_VER && (defined _M_AMD64 || defined _M_IX86)
244inline void cpuRelax() {
247#elif defined __arm64__ || defined __aarch64__
248inline void cpuRelax() {
249 asm volatile(
"yield" :::
"memory");
251#elif defined _MSC_VER && (defined _M_ARM64 || defined _M_ARM)
252inline void cpuRelax() {
255#elif defined __powerpc__ || defined __POWERPC__
257inline void cpuRelax() {
258 asm volatile(
"or r27,r27,r27" :::
"memory");
261inline void cpuRelax() {
262 asm volatile(
"or 27,27,27" :::
"memory");
267inline void cpuRelax() {}
277 ssize_t transitionTaskIndex;
278 ssize_t ceilChunkSize;
281inline StaticChunking staticChunkSize(ssize_t items, ssize_t chunks) {
284 chunking.ceilChunkSize = (items + chunks - 1) / chunks;
285 ssize_t numLeft = chunking.ceilChunkSize * chunks - items;
286 chunking.transitionTaskIndex = chunks - numLeft;
detail::AlignedAtomic< T > AlignedAtomic
Cache-line aligned atomic pointer.
detail::AlignedBuffer< T > AlignedBuffer
Buffer with proper alignment for type T.
detail::StaticChunking StaticChunking
Information for statically chunking a range across threads.