dispenso 1.6.0
A library for task parallelism
Loading...
Searching...
No Matches
parallel_for.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
14#pragma once
15
16#include <cmath>
17#include <limits>
18
19#include <dispenso/cpu_set.h>
21#include <dispenso/task_set.h>
22#include "detail/can_invoke.h"
23#include "detail/par_for_stripe.h"
24#include "detail/per_thread_info.h"
25
26namespace dispenso {
27
28#if DISPENSO_HAS_CONCEPTS
35template <typename F, typename IntegerT>
36concept ParallelForRangeFunc = std::invocable<F, IntegerT, IntegerT>;
37
44template <typename F, typename IntegerT>
45concept ParallelForIndexFunc = std::invocable<F, IntegerT>;
46
53template <typename F, typename StateRef, typename IntegerT>
54concept ParallelForStateRangeFunc = std::invocable<F, StateRef, IntegerT, IntegerT>;
55
62template <typename F, typename StateRef, typename IntegerT>
63concept ParallelForStateIndexFunc = std::invocable<F, StateRef, IntegerT>;
64#endif // DISPENSO_HAS_CONCEPTS
65
83enum class ParForChunking {
84 kStatic,
85 kAdaptive,
86 kAuto DISPENSO_DEPRECATED("Use ParForChunking::kAdaptive (kAuto will be removed in 2.0).") =
87 kAdaptive,
88};
89
99 uint32_t maxThreads = std::numeric_limits<int32_t>::max();
108 bool wait = true;
109
114 ParForChunking defaultChunking = ParForChunking::kStatic;
115
129 uint32_t minItemsPerChunk = 1;
130
148 uint32_t granularity = 1;
149
155 bool reuseExistingState = false;
156};
157
168template <typename IntegerT = ssize_t>
169struct ChunkedRange {
170 // We need to utilize 64-bit integers to avoid overflow, e.g. passing -2**30, 2**30 as int32 will
171 // result in overflow unless we cast to 64-bit. Note that if we have a range of e.g. -2**63+1 to
172 // 2**63-1, we cannot hold the result in an int64_t. We could in a uint64_t, but it is quite
173 // tricky to make this work. However, I do not expect ranges larger than can be held in int64_t
174 // since people want their computations to finish before the heat death of the sun (slight
175 // exaggeration).
176 using size_type = std::conditional_t<std::is_signed<IntegerT>::value, int64_t, uint64_t>;
177
178 struct Static {};
179 struct Auto {};
180 static constexpr IntegerT kStatic = std::numeric_limits<IntegerT>::max();
181
189 ChunkedRange(IntegerT s, IntegerT e, IntegerT c) : start(s), end(e), chunk(c) {}
196 ChunkedRange(IntegerT s, IntegerT e, Static) : ChunkedRange(s, e, kStatic) {}
204 ChunkedRange(IntegerT s, IntegerT e, Auto) : ChunkedRange(s, e, 0) {}
205
206 bool isStatic() const {
207 return chunk == kStatic;
208 }
209
210 bool isAuto() const {
211 return chunk == 0;
212 }
213
214 bool empty() const {
215 return end <= start;
216 }
217
218 size_type size() const {
219 return static_cast<size_type>(end) - start;
220 }
221
222 template <typename OtherInt>
223 std::tuple<size_type, size_type> calcChunkSize(
224 OtherInt numLaunched,
225 bool oneOnCaller,
226 size_type minChunkSize,
227 uint32_t granularity = 1,
228 size_type maxDynFactor = 16) const {
229 size_type workingThreads = static_cast<size_type>(numLaunched) + size_type{oneOnCaller};
230 assert(workingThreads > 0);
231
232 if (!chunk) {
233 size_type dynFactor = std::min<size_type>(maxDynFactor, size() / workingThreads);
234 size_type chunkSize;
235 do {
236 size_type roughChunks = dynFactor * workingThreads;
237 chunkSize = (size() + roughChunks - 1) / roughChunks;
238 if (granularity > 1) {
239 // Round UP to a multiple of granularity (no smaller than granularity).
240 chunkSize = ((chunkSize + granularity - 1) / granularity) * granularity;
241 }
242 --dynFactor;
243 } while (chunkSize < minChunkSize);
244 return {chunkSize, (size() + chunkSize - 1) / chunkSize};
245 } else if (chunk == kStatic) {
246 // This should never be called. The static distribution versions of the parallel_for
247 // functions should be invoked instead.
248 std::abort();
249 }
250 return {chunk, (size() + chunk - 1) / chunk};
251 }
252
253 IntegerT start;
254 IntegerT end;
255 IntegerT chunk;
256};
257
265template <typename IntegerA, typename IntegerB>
266inline ChunkedRange<std::common_type_t<IntegerA, IntegerB>>
267makeChunkedRange(IntegerA start, IntegerB end, ParForChunking chunking = ParForChunking::kStatic) {
268 using IntegerT = std::common_type_t<IntegerA, IntegerB>;
269 return (chunking == ParForChunking::kStatic)
270 ? ChunkedRange<IntegerT>(start, end, typename ChunkedRange<IntegerT>::Static())
271 : ChunkedRange<IntegerT>(start, end, typename ChunkedRange<IntegerT>::Auto());
272}
273
281template <typename IntegerA, typename IntegerB, typename IntegerC>
282inline ChunkedRange<std::common_type_t<IntegerA, IntegerB>>
283makeChunkedRange(IntegerA start, IntegerB end, IntegerC chunkSize) {
284 return ChunkedRange<std::common_type_t<IntegerA, IntegerB>>(start, end, chunkSize);
285}
286
287namespace detail {
288
289struct NoOpIter {
290 using difference_type = std::ptrdiff_t;
291 using value_type = int;
292 using pointer = int*;
293 using reference = int&;
294 using iterator_category = std::random_access_iterator_tag;
295
296 int& operator*() const {
297 static DISPENSO_THREAD_LOCAL int dummy = 0;
298 return dummy;
299 }
300 NoOpIter& operator++() {
301 return *this;
302 }
303 NoOpIter operator++(int) {
304 return *this;
305 }
306 NoOpIter& operator--() {
307 return *this;
308 }
309 NoOpIter operator--(int) {
310 return *this;
311 }
312 NoOpIter& operator+=(difference_type) {
313 return *this;
314 }
315 NoOpIter& operator-=(difference_type) {
316 return *this;
317 }
318 NoOpIter operator+(difference_type) const {
319 return *this;
320 }
321 NoOpIter operator-(difference_type) const {
322 return *this;
323 }
324 difference_type operator-(const NoOpIter&) const {
325 return 0;
326 }
327 bool operator==(const NoOpIter&) const {
328 return true;
329 }
330 bool operator!=(const NoOpIter&) const {
331 return false;
332 }
333 bool operator<(const NoOpIter&) const {
334 return false;
335 }
336 int& operator[](difference_type) const {
337 static DISPENSO_THREAD_LOCAL int dummy = 0;
338 return dummy;
339 }
340};
341
342struct NoOpContainer {
343 size_t size() const {
344 return 0;
345 }
346
347 bool empty() const {
348 return true;
349 }
350
351 void clear() {}
352
353 NoOpIter begin() {
354 return {};
355 }
356
357 void emplace_back(int) {}
358
359 int& front() {
360 static int i;
361 return i;
362 }
363};
364
365struct NoOpStateGen {
366 int operator()() const {
367 return 0;
368 }
369};
370
371// Round size DOWN to a multiple of granularity. Granularity must be >= 1.
372template <typename IntegerT>
373inline IntegerT roundDownToGranularity(IntegerT size, uint32_t granularity) {
374 if (granularity <= 1) {
375 return size;
376 }
377 using U = typename std::make_unsigned<IntegerT>::type;
378 return static_cast<IntegerT>((static_cast<U>(size) / granularity) * granularity);
379}
380
381// Round size UP to a multiple of granularity. Granularity must be >= 1.
382template <typename IntegerT>
383inline IntegerT roundUpToGranularity(IntegerT size, uint32_t granularity) {
384 if (granularity <= 1) {
385 return size;
386 }
387 using U = typename std::make_unsigned<IntegerT>::type;
388 return static_cast<IntegerT>(
389 ((static_cast<U>(size) + granularity - 1) / granularity) * granularity);
390}
391
392// Initialize states container with enough entries for the given thread count.
393// Respects reuseExistingState: when true, only adds entries if the container
394// doesn't already have enough.
395template <typename StateContainer, typename StateGen>
396void initStates(
397 StateContainer& states,
398 const StateGen& defaultState,
399 size_t numNeeded,
400 bool reuseExistingState) {
401 if (!reuseExistingState) {
402 states.clear();
403 }
404 for (size_t i = states.size(); i < numNeeded; ++i) {
405 states.emplace_back(defaultState());
406 }
407}
408
409template <typename IntegerT>
410struct ChunkSizingResult {
411 typename ChunkedRange<IntegerT>::size_type maxThreads;
412 bool isStatic;
413};
414
415// Adjust the thread count and static/dynamic scheduling decision based on work size.
416template <typename IntegerT>
417ChunkSizingResult<IntegerT> adjustChunkSizing(
418 const ChunkedRange<IntegerT>& range,
419 typename ChunkedRange<IntegerT>::size_type maxThreads,
420 bool isStatic,
421 uint32_t minItemsPerChunk,
422 typename ChunkedRange<IntegerT>::size_type poolThreads,
423 bool wait) {
424 using size_type = typename ChunkedRange<IntegerT>::size_type;
425
426 maxThreads = std::min<size_type>(maxThreads, poolThreads + 1);
427
428 if (minItemsPerChunk > 1) {
429 size_type maxWorkers = range.size() / minItemsPerChunk;
430 if (maxWorkers < maxThreads) {
431 maxThreads = maxWorkers;
432 }
433 if (maxThreads > 0 && range.size() / (maxThreads + wait) < minItemsPerChunk && range.isAuto()) {
434 isStatic = true;
435 }
436 } else if (range.size() <= poolThreads + wait) {
437 if (range.isAuto()) {
438 isStatic = true;
439 } else if (!range.isStatic()) {
440 maxThreads = range.size() - wait;
441 }
442 }
443
444 return {maxThreads, isStatic};
445}
446
447// Compute effective granularity and trimmed range end.
448template <typename IntegerT>
449struct GranularityInfo {
450 uint32_t granularity;
451 IntegerT trimmedEnd;
452 bool hasTail;
453};
454
455template <typename IntegerT>
456GranularityInfo<IntegerT> computeGranularity(
457 const ChunkedRange<IntegerT>& range,
458 uint32_t requested) {
459 using size_type = typename ChunkedRange<IntegerT>::size_type;
460 uint32_t granularity = (range.chunk == 0 || range.chunk == ChunkedRange<IntegerT>::kStatic)
461 ? std::max<uint32_t>(1, requested)
462 : 1;
463
464 IntegerT trimmedEnd = range.end;
465 bool hasTail = false;
466 if (granularity > 1) {
467 size_type rem = range.size() % granularity;
468 if (rem > 0) {
469 trimmedEnd = static_cast<IntegerT>(range.end - static_cast<IntegerT>(rem));
470 hasTail = true;
471 }
472 }
473 return {granularity, trimmedEnd, hasTail};
474}
475
476// Adaptive (stripe-based) wait dispatch for the top-level parallel_for.
477template <typename TaskSetT, typename IntegerT, typename F, typename StateContainer>
478void parallel_for_adaptiveWaitDispatch(
479 TaskSetT& taskSet,
480 StateContainer& states,
481 const ChunkedRange<IntegerT>& parRange,
482 F&& f,
483 size_t numToLaunch,
484 uint32_t minItemsPerChunk,
485 uint32_t granularity) {
486 using size_type = typename ChunkedRange<IntegerT>::size_type;
487 size_type numStripeWorkers = static_cast<size_type>(numToLaunch) + 1;
488 auto adaptiveChunkInfo =
489 parRange.calcChunkSize(numToLaunch, true, minItemsPerChunk, granularity, /*maxDynFactor=*/64);
490 auto adaptiveChunkSize = std::get<0>(adaptiveChunkInfo);
491
492 detail::StripeState<IntegerT> stripeState;
493 detail::initStripeState(
494 stripeState,
495 parRange.start,
496 parRange.end,
497 static_cast<uint32_t>(numStripeWorkers),
498 static_cast<IntegerT>(adaptiveChunkSize),
499 granularity);
500 auto stateBegin = states.begin();
501 auto worker = [&stripeState, &f](auto& userState, uint32_t myIdx) {
502 auto recurseInfo = detail::PerPoolPerThreadInfo::parForRecurse();
503 detail::runStripeWorker(stripeState, myIdx, userState, f);
504 };
505 if (numToLaunch > 0) {
506 taskSet.scheduleBulk(numToLaunch, [stateBegin, worker](size_t idx) {
507 auto stateIt = stateBegin;
508 std::advance(stateIt, static_cast<ptrdiff_t>(idx));
509 uint32_t myIdx = static_cast<uint32_t>(idx);
510 return [&userState = *stateIt, myIdx, worker]() { worker(userState, myIdx); };
511 });
512 }
513 auto callerIt = states.begin();
514 std::advance(callerIt, static_cast<ptrdiff_t>(numToLaunch));
515 worker(*callerIt, static_cast<uint32_t>(numToLaunch));
516 taskSet.wait();
517}
518
519} // namespace detail
520
521} // namespace dispenso
522
523// Implementation detail headers — included after all shared types are defined.
524// These are not standalone headers; they depend on types defined above.
525#include "detail/par_for_dynamic.h"
526#include "detail/par_for_static.h"
527
528namespace dispenso {
529
549template <
550 typename TaskSetT,
551 typename IntegerT,
552 typename F,
553 typename StateContainer,
554 typename StateGen>
556 TaskSetT& taskSet,
557 StateContainer& states,
558 const StateGen& defaultState,
559 const ChunkedRange<IntegerT>& range,
560 F&& f,
561 ParForOptions options = {}) {
562 if (range.empty()) {
563 if (options.wait) {
564 taskSet.wait();
565 }
566 return;
567 }
568
569 using size_type = typename ChunkedRange<IntegerT>::size_type;
570
571 auto granInfo = detail::computeGranularity(range, options.granularity);
572 uint32_t granularity = granInfo.granularity;
573 IntegerT trimmedEnd = granInfo.trimmedEnd;
574 bool hasTail = granInfo.hasTail;
575
576 auto runTail = [&]() {
577 if (hasTail) {
578 f(*states.begin(), trimmedEnd, range.end);
579 }
580 };
581
582 uint32_t minItemsPerChunk = std::max<uint32_t>(1, options.minItemsPerChunk);
583 size_type maxThreads = std::max<int32_t>(options.maxThreads, 1);
584 bool isStatic = range.isStatic();
585
586 ChunkedRange<IntegerT> parRange = range;
587 parRange.end = trimmedEnd;
588
589 const size_type N = taskSet.numPoolThreads();
590
591 // If the parallel portion is empty (entire range is a sub-granularity tail),
592 // or there's no pool / recursion, run everything inline.
593 if (parRange.empty() || N == 0 ||
594 detail::PerPoolPerThreadInfo::isParForRecursive(&taskSet.pool())) {
595 detail::initStates(states, defaultState, 1, options.reuseExistingState);
596 f(*states.begin(), range.start, range.end);
597 if (options.wait) {
598 taskSet.wait();
599 }
600 return;
601 }
602
603 auto chunkSizing =
604 detail::adjustChunkSizing(parRange, maxThreads, isStatic, minItemsPerChunk, N, options.wait);
605 maxThreads = chunkSizing.maxThreads;
606 isStatic = chunkSizing.isStatic;
607
608 if (maxThreads < 2) {
609 detail::initStates(states, defaultState, 1, options.reuseExistingState);
610 f(*states.begin(), range.start, range.end);
611 if (options.wait) {
612 taskSet.wait();
613 }
614 return;
615 }
616
617 if (isStatic) {
618 detail::parallel_for_staticImpl(
619 taskSet,
620 states,
621 defaultState,
622 parRange,
623 std::forward<F>(f),
624 static_cast<ssize_t>(maxThreads),
625 options.wait,
626 options.reuseExistingState,
627 granularity);
628 runTail();
629 return;
630 }
631
632 const size_type numToLaunch = std::min<size_type>(maxThreads - options.wait, N);
633
634 detail::initStates(
635 states,
636 defaultState,
637 static_cast<size_t>(numToLaunch + options.wait),
638 options.reuseExistingState);
639
640 bool useAdaptive = range.chunk == 0;
641
642 auto chunkInfo = parRange.calcChunkSize(numToLaunch, options.wait, minItemsPerChunk, granularity);
643 auto chunkSize = std::get<0>(chunkInfo);
644 auto numChunks = std::get<1>(chunkInfo);
645
646 if (useAdaptive && options.wait) {
647 detail::parallel_for_adaptiveWaitDispatch(
648 taskSet,
649 states,
650 parRange,
651 std::forward<F>(f),
652 static_cast<size_t>(numToLaunch),
653 minItemsPerChunk,
654 granularity);
655 runTail();
656 return;
657 }
658
659 if (options.wait) {
660 alignas(kCacheLineSize) std::atomic<decltype(numChunks)> index(0);
661 detail::parallel_for_dynamicImpl(
662 taskSet,
663 states,
664 parRange.start,
665 parRange.end,
666 std::forward<F>(f),
667 static_cast<size_t>(numToLaunch),
668 chunkSize,
669 numChunks,
670 index,
671 [](auto) {},
672 options.wait);
673 runTail();
674 } else {
675 detail::parallel_for_dynamicNoWaitDispatch(
676 taskSet,
677 states,
678 parRange,
679 std::forward<F>(f),
680 static_cast<size_t>(numToLaunch),
681 chunkSize,
682 numChunks,
683 range.end,
684 hasTail);
685 }
686}
687
697template <typename TaskSetT, typename IntegerT, typename F>
698DISPENSO_REQUIRES(ParallelForRangeFunc<F, IntegerT>)
699void parallel_for(
700 TaskSetT& taskSet,
701 const ChunkedRange<IntegerT>& range,
702 F&& f,
703 ParForOptions options = {}) {
704 detail::NoOpContainer container;
705 parallel_for(
706 taskSet,
707 container,
708 detail::NoOpStateGen(),
709 range,
710 [f = std::move(f)](int /*noop*/, auto i, auto j) { f(i, j); },
711 options);
712}
713
723template <typename IntegerT, typename F>
724DISPENSO_REQUIRES(ParallelForRangeFunc<F, IntegerT>)
725void parallel_for(const ChunkedRange<IntegerT>& range, F&& f, ParForOptions options = {}) {
726 TaskSet taskSet(globalThreadPool());
727 options.wait = true;
728 parallel_for(taskSet, range, std::forward<F>(f), options);
729}
730
748template <typename F, typename IntegerT, typename StateContainer, typename StateGen>
750 StateContainer& states,
751 const StateGen& defaultState,
752 const ChunkedRange<IntegerT>& range,
753 F&& f,
754 ParForOptions options = {}) {
755 TaskSet taskSet(globalThreadPool());
756 options.wait = true;
757 parallel_for(taskSet, states, defaultState, range, std::forward<F>(f), options);
758}
759
770#if DISPENSO_HAS_CONCEPTS
771template <typename TaskSetT, std::integral IntegerA, std::integral IntegerB, typename F>
772 requires std::invocable<F, IntegerA>
773#else
774template <
775 typename TaskSetT,
776 typename IntegerA,
777 typename IntegerB,
778 typename F,
779 std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
780 std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
781 std::enable_if_t<detail::CanInvoke<F(IntegerA)>::value, bool> = true>
782#endif
784 TaskSetT& taskSet,
785 IntegerA start,
786 IntegerB end,
787 F&& f,
788 ParForOptions options = {}) {
789 using IntegerT = std::common_type_t<IntegerA, IntegerB>;
790
791 auto range = makeChunkedRange(start, end, options.defaultChunking);
792 parallel_for(
793 taskSet,
794 range,
795 [f = std::move(f)](IntegerT s, IntegerT e) {
796 for (IntegerT i = s; i < e; ++i) {
797 f(i);
798 }
799 },
800 options);
801}
802
804#if DISPENSO_HAS_CONCEPTS
805template <typename TaskSetT, std::integral IntegerA, std::integral IntegerB, typename F>
806 requires std::invocable<F, IntegerA, IntegerB>
807#else
808template <
809 typename TaskSetT,
810 typename IntegerA,
811 typename IntegerB,
812 typename F,
813 std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
814 std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
815 std::enable_if_t<detail::CanInvoke<F(IntegerA, IntegerB)>::value, bool> = true>
816#endif
817void parallel_for(
818 TaskSetT& taskSet,
819 IntegerA start,
820 IntegerB end,
821 F&& f,
822 ParForOptions options = {}) {
823 auto range = makeChunkedRange(start, end, options.defaultChunking);
824 parallel_for(taskSet, range, std::forward<F>(f), options);
825}
826
837#if DISPENSO_HAS_CONCEPTS
838template <std::integral IntegerA, std::integral IntegerB, typename F>
839#else
840template <
841 typename IntegerA,
842 typename IntegerB,
843 typename F,
844 std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
845 std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true>
846#endif
847void parallel_for(IntegerA start, IntegerB end, F&& f, ParForOptions options = {}) {
848 TaskSet taskSet(globalThreadPool());
849 options.wait = true;
850 parallel_for(taskSet, start, end, std::forward<F>(f), options);
851}
852
871#if DISPENSO_HAS_CONCEPTS
872template <
873 typename TaskSetT,
874 std::integral IntegerA,
875 std::integral IntegerB,
876 typename F,
877 typename StateContainer,
878 typename StateGen>
879 requires std::invocable<F, typename StateContainer::reference, IntegerA>
880#else
881template <
882 typename TaskSetT,
883 typename IntegerA,
884 typename IntegerB,
885 typename F,
886 typename StateContainer,
887 typename StateGen,
888 std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
889 std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
890 std::enable_if_t<
891 detail::CanInvoke<F(typename StateContainer::reference, IntegerA)>::value,
892 bool> = true>
893#endif
895 TaskSetT& taskSet,
896 StateContainer& states,
897 const StateGen& defaultState,
898 IntegerA start,
899 IntegerB end,
900 F&& f,
901 ParForOptions options = {}) {
902 using IntegerT = std::common_type_t<IntegerA, IntegerB>;
903 auto range = makeChunkedRange(start, end, options.defaultChunking);
904 parallel_for(
905 taskSet,
906 states,
907 defaultState,
908 range,
909 [f = std::move(f)](auto& state, IntegerT s, IntegerT e) {
910 for (IntegerT i = s; i < e; ++i) {
911 f(state, i);
912 }
913 },
914 options);
915}
916
918#if DISPENSO_HAS_CONCEPTS
919template <
920 typename TaskSetT,
921 std::integral IntegerA,
922 std::integral IntegerB,
923 typename F,
924 typename StateContainer,
925 typename StateGen>
926 requires std::invocable<F, typename StateContainer::reference, IntegerA, IntegerB>
927#else
928template <
929 typename TaskSetT,
930 typename IntegerA,
931 typename IntegerB,
932 typename F,
933 typename StateContainer,
934 typename StateGen,
935 std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
936 std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
937 std::enable_if_t<
938 detail::CanInvoke<F(typename StateContainer::reference, IntegerA, IntegerB)>::value,
939 bool> = true>
940#endif
941void parallel_for(
942 TaskSetT& taskSet,
943 StateContainer& states,
944 const StateGen& defaultState,
945 IntegerA start,
946 IntegerB end,
947 F&& f,
948 ParForOptions options = {}) {
949 auto range = makeChunkedRange(start, end, options.defaultChunking);
950 parallel_for(taskSet, states, defaultState, range, std::forward<F>(f), options);
951}
952
972#if DISPENSO_HAS_CONCEPTS
973template <
974 std::integral IntegerA,
975 std::integral IntegerB,
976 typename F,
977 typename StateContainer,
978 typename StateGen>
979#else
980template <
981 typename IntegerA,
982 typename IntegerB,
983 typename F,
984 typename StateContainer,
985 typename StateGen,
986 std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
987 std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true>
988#endif
990 StateContainer& states,
991 const StateGen& defaultState,
992 IntegerA start,
993 IntegerB end,
994 F&& f,
995 ParForOptions options = {}) {
996 TaskSet taskSet(globalThreadPool());
997 options.wait = true;
998 parallel_for(taskSet, states, defaultState, start, end, std::forward<F>(f), options);
999}
1000
1001} // namespace dispenso
void parallel_for(TaskSetT &taskSet, StateContainer &states, const StateGen &defaultState, const ChunkedRange< IntegerT > &range, F &&f, ParForOptions options={})
ChunkedRange< std::common_type_t< IntegerA, IntegerB > > makeChunkedRange(IntegerA start, IntegerB end, ParForChunking chunking=ParForChunking::kStatic)
constexpr size_t kCacheLineSize
A constant that defines a safe number of bytes+alignment to avoid false sharing.
Definition platform.h:125
ParForChunking defaultChunking