dispenso/mpmc__ring__buffer_8h_source.html

/*

 * Copyright (c) Meta Platforms, Inc. and affiliates.

 *

 * This source code is licensed under the MIT license found in the

 * LICENSE file in the root directory of this source tree.

 */


#pragma once


#include <atomic>

#include <cstddef>

#include <cstdint>

#include <new>

#include <type_traits>

#include <utility>


#include <dispenso/platform.h>

#include <dispenso/util.h>


namespace dispenso {


template <typename T, size_t Capacity = 16, bool RoundUpToPowerOfTwo = true>

#if DISPENSO_HAS_CONCEPTS

  requires std::move_constructible<T> && std::is_nothrow_move_constructible_v<T>

#endif


class MpmcRingBuffer {

  static_assert(Capacity >= 2, "MpmcRingBuffer capacity must be at least 2");

#if !DISPENSO_HAS_CONCEPTS

  static_assert(

      std::is_move_constructible<T>::value,

      "MpmcRingBuffer element type must be move-constructible");

  static_assert(

      std::is_nothrow_move_constructible<T>::value,

      "MpmcRingBuffer element type must be nothrow-move-constructible");

#endif


 public:

  using value_type = T;


  using size_type = size_t;


  MpmcRingBuffer() {

    for (size_t i = 0; i < kBufferSize; ++i) {

      slots_[i].seq.store(i, std::memory_order_relaxed);

    }

  }


  MpmcRingBuffer(const MpmcRingBuffer&) = delete;


  MpmcRingBuffer& operator=(const MpmcRingBuffer&) = delete;


  MpmcRingBuffer(MpmcRingBuffer&&) = delete;


  MpmcRingBuffer& operator=(MpmcRingBuffer&&) = delete;


  ~MpmcRingBuffer() {

    // Drain and destroy any remaining elements.

    // At destruction time there must be no concurrent access, so we can

    // read head/tail relaxed and walk forward, destroying elements whose

    // sequence numbers indicate they contain data.

    size_t head = head_.load(std::memory_order_relaxed);

    size_t tail = tail_.load(std::memory_order_relaxed);

    while (head != tail) {

      size_t pos = wrapIndex(head);

      dataPtr(slots_[pos])->~T();

      ++head;

    }

  }


  bool try_push(T&& item) {

    return emplaceImpl(std::move(item));

  }


#if DISPENSO_HAS_CONCEPTS

  bool try_push(const T& item)

    requires std::is_nothrow_copy_constructible_v<T>

  {

    return emplaceImpl(item);

  }

#else

  template <typename U = T, std::enable_if_t<std::is_nothrow_copy_constructible<U>::value, int> = 0>


  bool try_push(const T& item) {

    return emplaceImpl(item);

  }


#endif


#if DISPENSO_HAS_CONCEPTS

  template <typename... Args>

    requires std::is_nothrow_constructible_v<T, Args...>

  bool try_emplace(Args&&... args) {

    return emplaceImpl(std::forward<Args>(args)...);

  }

#else

  template <

      typename... Args,

      std::enable_if_t<std::is_nothrow_constructible<T, Args...>::value, int> = 0>


  bool try_emplace(Args&&... args) {

    return emplaceImpl(std::forward<Args>(args)...);

  }


#endif


  bool try_pop(T& item) {

    // This overload move-*assigns* into the caller's object. If that throws after

    // the head_ CAS below, the slot would be left unreleased (seq stuck at

    // head+1) and its element leaked, since the destructor only walks [head,

    // tail). Require nothrow move-assignment so the slot-release path can never

    // be derailed. Types that are only nothrow-move-constructible can still use

    // try_pop() (OpResult) or try_pop_into(), which move-construct.

    static_assert(

        std::is_nothrow_move_assignable<T>::value,

        "MpmcRingBuffer::try_pop(T&) requires a nothrow-move-assignable T; "

        "use try_pop() or try_pop_into() for nothrow-move-constructible-only types");

    size_t head = head_.load(std::memory_order_relaxed);

    // Fast empty-check: a relaxed tail load is much cheaper than the acquire

    // slot.seq load below (esp. on weak-memory architectures: ldr vs ldar).

    // Callers that poll many sources for work hit this path constantly.

    if (head == tail_.load(std::memory_order_relaxed)) {

      return false;

    }

    Slot& slot = slots_[wrapIndex(head)];

    size_t seq = slot.seq.load(std::memory_order_acquire);

    intptr_t diff = static_cast<intptr_t>(seq) - static_cast<intptr_t>(head + 1);

    if (diff == 0) {

      if (head_.compare_exchange_strong(head, head + 1, std::memory_order_relaxed)) {

        T* elem = dataPtr(slot);

        item = std::move(*elem);

        elem->~T();

        slot.seq.store(head + kBufferSize, std::memory_order_release);

        return true;

      }

    }

    return false;

  }


  OpResult<T> try_pop() {

    size_t head = head_.load(std::memory_order_relaxed);

    if (head == tail_.load(std::memory_order_relaxed)) {

      return {};

    }

    Slot& slot = slots_[wrapIndex(head)];

    size_t seq = slot.seq.load(std::memory_order_acquire);

    intptr_t diff = static_cast<intptr_t>(seq) - static_cast<intptr_t>(head + 1);

    if (diff == 0) {

      if (head_.compare_exchange_strong(head, head + 1, std::memory_order_relaxed)) {

        T* elem = dataPtr(slot);

        OpResult<T> result(std::move(*elem));

        elem->~T();

        slot.seq.store(head + kBufferSize, std::memory_order_release);

        return result;

      }

    }

    return {};

  }


  bool try_pop_into(T* storage) {

    size_t head = head_.load(std::memory_order_relaxed);

    if (head == tail_.load(std::memory_order_relaxed)) {

      return false;

    }

    Slot& slot = slots_[wrapIndex(head)];

    size_t seq = slot.seq.load(std::memory_order_acquire);

    intptr_t diff = static_cast<intptr_t>(seq) - static_cast<intptr_t>(head + 1);

    if (diff == 0) {

      if (head_.compare_exchange_strong(head, head + 1, std::memory_order_relaxed)) {

        T* elem = dataPtr(slot);

        new (storage) T(std::move(*elem));

        elem->~T();

        slot.seq.store(head + kBufferSize, std::memory_order_release);

        return true;

      }

    }

    return false;

  }


  size_type try_push_batch(T* items, size_type count) {

    if (count == 0) {

      return 0;

    }

    if (count > kBufferSize) {

      count = kBufferSize;

    }


    size_t tail = tail_.load(std::memory_order_relaxed);


    // Validate each slot in the reservation range.

    size_t available = 0;

    for (size_t i = 0; i < count; ++i) {

      Slot& slot = slots_[wrapIndex(tail + i)];

      size_t seq = slot.seq.load(std::memory_order_acquire);

      intptr_t diff = static_cast<intptr_t>(seq) - static_cast<intptr_t>(tail + i);

      if (diff != 0) {

        break;

      }

      ++available;

    }

    if (available == 0) {

      return 0;

    }


    if (tail_.compare_exchange_strong(tail, tail + available, std::memory_order_relaxed)) {

      for (size_t i = 0; i < available; ++i) {

        Slot& slot = slots_[wrapIndex(tail + i)];

        new (dataPtr(slot)) T(std::move(items[i]));

        slot.seq.store(tail + i + 1, std::memory_order_release);

      }

      return available;

    }


    return 0;

  }


  bool empty() const {

    size_t head = head_.load(std::memory_order_relaxed);

    size_t tail = tail_.load(std::memory_order_relaxed);

    return head == tail;

  }


  bool full() const {

    size_t head = head_.load(std::memory_order_relaxed);

    size_t tail = tail_.load(std::memory_order_relaxed);

    return (tail - head) >= kBufferSize;

  }


  size_type size() const {

    size_t head = head_.load(std::memory_order_relaxed);

    size_t tail = tail_.load(std::memory_order_relaxed);

    return tail - head;

  }


  static constexpr size_type capacity() noexcept {

    return kBufferSize;

  }


 private:

  static constexpr size_t computeBufferSize() noexcept {

    return RoundUpToPowerOfTwo ? static_cast<size_t>(detail::nextPow2(Capacity)) : Capacity;

  }


  static constexpr size_t kBufferSize = computeBufferSize();

  static_assert(

      (kBufferSize & (kBufferSize - 1)) == 0 || !RoundUpToPowerOfTwo,

      "Internal error: kBufferSize must be power of two when RoundUpToPowerOfTwo is true");

  static constexpr bool kIsPow2 = (kBufferSize & (kBufferSize - 1)) == 0;

  static constexpr size_t kMask = kBufferSize - 1;


  static size_t wrapIndex(size_t i) {

    return kIsPow2 ? (i & kMask) : (i % kBufferSize);

  }


  // Shared single-slot fast path for try_push() and try_emplace(): reserve one slot with a single

  // CAS on the tail, then construct T in place from the forwarded arguments. Intentionally

  // unconstrained -- the public overloads carry the nothrow-construction constraints; this just

  // centralizes the (otherwise identical) algorithm so it lives in one place. Fully inlined, so

  // the forwarding adds no runtime cost on the hot path.

  template <typename... Args>

  bool emplaceImpl(Args&&... args) {

    size_t tail = tail_.load(std::memory_order_relaxed);

    Slot& slot = slots_[wrapIndex(tail)];

    size_t seq = slot.seq.load(std::memory_order_acquire);

    intptr_t diff = static_cast<intptr_t>(seq) - static_cast<intptr_t>(tail);

    if (diff == 0) {

      // ABA-free: tail_ is a monotonic 64-bit counter, so a successful CAS proves no other

      // producer claimed this position since the load (see "Correctness & ABA-freedom" above).

      // Fail-fast: a single attempt, no retry loop -- contention returns false, not corruption.

      if (tail_.compare_exchange_strong(tail, tail + 1, std::memory_order_relaxed)) {

        new (dataPtr(slot)) T(std::forward<Args>(args)...);

        slot.seq.store(tail + 1, std::memory_order_release);

        return true;

      }

    }

    return false;

  }


  // The element buffer comes first: placing the over-aligned `data` ahead of `seq` keeps any

  // padding T's alignment requires from landing *between* the two members, which can otherwise

  // tip a slot that would have fit in one cache line over the boundary. The trailing

  // alignas(kCacheLineSize) still rounds the whole slot up to a cache-line multiple to prevent

  // false sharing between neighbors.

  struct alignas(kCacheLineSize) Slot {

    alignas(T) char data[sizeof(T)];

    std::atomic<size_t> seq;

  };


  T* dataPtr(Slot& slot) {

    return reinterpret_cast<T*>(slot.data);

  }


  const T* dataPtr(const Slot& slot) const {

    return reinterpret_cast<const T*>(slot.data);

  }


  alignas(kCacheLineSize) std::atomic<size_t> head_{0};


  alignas(kCacheLineSize) std::atomic<size_t> tail_{0};


  Slot slots_[kBufferSize];

};


} // namespace dispenso

dispenso::MpmcRingBuffer
A lock-free multi-producer multi-consumer ring buffer with fixed capacity.
Definition mpmc_ring_buffer.h:171

dispenso::MpmcRingBuffer::try_push_batch
size_type try_push_batch(T *items, size_type count)
Attempts to push multiple elements into the buffer.
Definition mpmc_ring_buffer.h:496

dispenso::MpmcRingBuffer::try_emplace
bool try_emplace(Args &&... args)
Attempts to construct an element in-place in the buffer.
Definition mpmc_ring_buffer.h:330

dispenso::MpmcRingBuffer::try_push
bool try_push(const T &item)
Attempts to push an element into the buffer by copying.
Definition mpmc_ring_buffer.h:294

dispenso::MpmcRingBuffer::try_pop
bool try_pop(T &item)
Attempts to pop an element from the buffer.
Definition mpmc_ring_buffer.h:359

dispenso::MpmcRingBuffer::MpmcRingBuffer
MpmcRingBuffer()
Constructs an empty ring buffer.
Definition mpmc_ring_buffer.h:202

dispenso::MpmcRingBuffer::full
bool full() const
Checks if the buffer is full.
Definition mpmc_ring_buffer.h:555

dispenso::MpmcRingBuffer::try_pop
OpResult< T > try_pop()
Attempts to pop an element from the buffer, returning an optional.
Definition mpmc_ring_buffer.h:413

dispenso::MpmcRingBuffer::size
size_type size() const
Returns the approximate number of elements in the buffer.
Definition mpmc_ring_buffer.h:570

dispenso::MpmcRingBuffer::MpmcRingBuffer
MpmcRingBuffer(const MpmcRingBuffer &)=delete
Ring buffers are not copyable.

dispenso::MpmcRingBuffer::~MpmcRingBuffer
~MpmcRingBuffer()
Destroys the ring buffer.
Definition mpmc_ring_buffer.h:236

dispenso::MpmcRingBuffer::capacity
static constexpr size_type capacity() noexcept
Returns the maximum number of elements the buffer can hold.
Definition mpmc_ring_buffer.h:584

dispenso::MpmcRingBuffer::try_pop_into
bool try_pop_into(T *storage)
Attempts to pop an element into uninitialized storage.
Definition mpmc_ring_buffer.h:448

dispenso::MpmcRingBuffer::MpmcRingBuffer
MpmcRingBuffer(MpmcRingBuffer &&)=delete
Ring buffers are not movable.

dispenso::MpmcRingBuffer::empty
bool empty() const
Checks if the buffer is empty.
Definition mpmc_ring_buffer.h:541

dispenso::MpmcRingBuffer::try_push
bool try_push(T &&item)
Attempts to push an element into the buffer by moving.
Definition mpmc_ring_buffer.h:272

platform.h

dispenso::kCacheLineSize
constexpr size_t kCacheLineSize
A constant that defines a safe number of bytes+alignment to avoid false sharing.
Definition platform.h:125

util.h